<?xml version="1.0" encoding="UTF-8"?>
<SAMPLE_SET xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <SAMPLE alias="SAMEA13537274" accession="ERS11139244">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139244</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537274</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737954_virus.304</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737954.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.9512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737954) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561027) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_3_1551__NODE_304_length_45248_cov_14.490735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_3_1551__NODE_304_length_45248_cov_14.490735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738174_virus.898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537273" accession="ERS11139243">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139243</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537273</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737953_virus.939</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09730176211453746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_7_1595__NODE_939_length_22069_cov_12.971672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_7_1595__NODE_939_length_22069_cov_12.971672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737953_virus.939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537294" accession="ERS11139264">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139264</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537294</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737966_virus.1133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737966.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737966) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_1133_length_37852_cov_8.884077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_1133_length_37852_cov_8.884077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.1150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537271" accession="ERS11139241">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139241</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537271</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737953_provirus.369</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.95469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_7_1595__NODE_369_length_42300_cov_4.319541_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_7_1595__NODE_369_length_42300_cov_4.319541_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_virus.2288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537296" accession="ERS11139266">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139266</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537296</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737966_virus.2571</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737966.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737966) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_2571_length_19798_cov_6.783429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_2571_length_19798_cov_6.783429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737966_virus.2571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537270" accession="ERS11139240">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139240</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537270</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737623_provirus.97</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1674559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.78221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561664) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_8_1236__NODE_97_length_41644_cov_5.562947_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737623_bin.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_8_1236__NODE_97_length_41644_cov_5.562947_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737623_provirus.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537269" accession="ERS11139239">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139239</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537269</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737622_virus.469</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.3072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_23_1949__NODE_469_length_43030_cov_14.787652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_23_1949__NODE_469_length_43030_cov_14.787652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_virus.1286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537299" accession="ERS11139269">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139269</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537299</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737967_provirus.356</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7737967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559278) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_356_length_51656_cov_29.746719_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737967_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_356_length_51656_cov_29.746719_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738617_provirus.238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537268" accession="ERS11139238">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139238</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537268</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737622_virus.1538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7737622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06720782038701123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.1354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_23_1949__NODE_1538_length_16195_cov_16.370704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_23_1949__NODE_1538_length_16195_cov_16.370704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.2701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537302" accession="ERS11139272">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139272</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537302</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737967_virus.69</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559278) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_69_length_104417_cov_10.008166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8928571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_69_length_104417_cov_10.008166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738225_virus.120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537267" accession="ERS11139237">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139237</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537267</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737622_provirus.300</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7737622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.7324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_23_1949__NODE_300_length_58150_cov_20.380073_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_23_1949__NODE_300_length_58150_cov_20.380073_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537304" accession="ERS11139274">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139274</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537304</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737968_virus.1040</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737968.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.49465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737968) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559460) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_22_2670__NODE_1040_length_23235_cov_3.561232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_22_2670__NODE_1040_length_23235_cov_3.561232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738265_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp900540255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737968_virus.1040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537263" accession="ERS11139233">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139233</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537263</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737618_virus.382</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737618.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03912499170499385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.5583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737618) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560891) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_10_1550__NODE_382_length_24986_cov_19.789554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_10_1550__NODE_382_length_24986_cov_19.789554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_provirus.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537264" accession="ERS11139234">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139234</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537264</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737619_provirus.187</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560055) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_6_2154__NODE_187_length_46067_cov_7.770385_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_6_2154__NODE_187_length_46067_cov_7.770385_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745867_provirus.369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537265" accession="ERS11139235">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139235</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537265</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737619_virus.2101</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7737619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.31249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560055) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_6_2154__NODE_2101_length_10339_cov_3.924186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_6_2154__NODE_2101_length_10339_cov_3.924186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900554205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737619_virus.2101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537283" accession="ERS11139253">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139253</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537283</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737958_provirus.270</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737958.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737958) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561605) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_20_1212__NODE_270_length_26109_cov_8.107714_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_20_1212__NODE_270_length_26109_cov_8.107714_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738236_provirus.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537284" accession="ERS11139254">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139254</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537284</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737964_provirus.1</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7737964.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.1524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737964) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_1_length_563034_cov_29.663084_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737964_bin.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_1_length_563034_cov_29.663084_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745783_provirus.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537285" accession="ERS11139255">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139255</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537285</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737964_provirus.468</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7737964.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737964) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_468_length_56279_cov_10.333938_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9629629629629628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_468_length_56279_cov_10.333938_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME248826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_D</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737964_provirus.468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537286" accession="ERS11139256">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139256</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537286</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737964_virus.165</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737964.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.35719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737964) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_165_length_103581_cov_5.375599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_165_length_103581_cov_5.375599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_virus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537288" accession="ERS11139258">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139258</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537288</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737964_virus.72</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Tequatrovirus virus assembled from ERR7737964.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>574.491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737964) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_72_length_154530_cov_331.296906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5648535564853556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_72_length_154530_cov_331.296906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0377602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tevenvirinae; Tequatrovirus; unclassified Tequatrovirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537290" accession="ERS11139260">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139260</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537290</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737965_virus.1113</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737965.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737965) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561160) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_22_1612__NODE_1113_length_19002_cov_8.814901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_22_1612__NODE_1113_length_19002_cov_8.814901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738552_bin.347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.6318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537291" accession="ERS11139261">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139261</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537291</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737965_virus.366</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737965.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737965) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561160) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_22_1612__NODE_366_length_36390_cov_8.491973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_22_1612__NODE_366_length_36390_cov_8.491973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738241_virus.1176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537292" accession="ERS11139262">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139262</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537292</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737965_virus.84</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737965.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.4979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737965) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561160) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_22_1612__NODE_84_length_78157_cov_14.728855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_22_1612__NODE_84_length_78157_cov_14.728855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_virus.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537305" accession="ERS11139275">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139275</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537305</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737968_virus.821</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737968.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03997797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.12441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737968) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559460) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_22_2670__NODE_821_length_32872_cov_4.638634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_22_2670__NODE_821_length_32872_cov_4.638634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738257_virus.590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537307" accession="ERS11139277">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139277</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537307</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737969_provirus.99</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737969.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.9925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737969) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_99_length_160686_cov_12.281547_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737969_bin.176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6052631578947368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_99_length_160686_cov_12.281547_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745577_provirus.301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537309" accession="ERS11139279">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139279</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537309</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737969_virus.245</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7737969.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.3187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737969) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_245_length_103236_cov_15.138766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9178082191780822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_245_length_103236_cov_15.138766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0368853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537312" accession="ERS11139282">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139282</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537312</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737970_virus.10219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7737970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2048458149779736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.8093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_10219_length_5395_cov_51.317789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_10219_length_5395_cov_51.317789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.21679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537313" accession="ERS11139283">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139283</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537313</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737970_virus.185</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.9283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_185_length_93506_cov_45.829946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_185_length_93506_cov_45.829946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME280504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900555735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738151_provirus.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537315" accession="ERS11139285">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139285</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537315</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737970_virus.852</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>167.436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_852_length_38236_cov_99.396001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_852_length_38236_cov_99.396001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_virus.852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537316" accession="ERS11139286">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139286</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537316</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_provirus.277</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Flavobacterium virus Laban provirus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07497530020756127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_277_length_101251_cov_10.981576_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737971_bin.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_277_length_101251_cov_10.981576_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_provirus.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Labanvirus; Flavobacterium virus Laban</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537318" accession="ERS11139288">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139288</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537318</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.1284</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>7.8965839962564e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_1284_length_37005_cov_11.770635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_1284_length_37005_cov_11.770635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738152_bin.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900546445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.1061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537319" accession="ERS11139289">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139289</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537319</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.1542</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6902524322062463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.4451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_1542_length_31927_cov_44.646122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_1542_length_31927_cov_44.646122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_virus.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537324" accession="ERS11139294">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139294</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537324</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.774</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_774_length_54100_cov_8.337949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9577464788732394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_774_length_54100_cov_8.337949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.2014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537326" accession="ERS11139296">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139296</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537326</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737972_provirus.644</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737972.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.8937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737972) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560076) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_644_length_58514_cov_20.645670_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737972_bin.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_644_length_58514_cov_20.645670_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737622_virus.559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537329" accession="ERS11139299">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139299</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537329</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737972_virus.3430</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7737972.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07381662166094104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.2414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737972) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560076) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_3430_length_15559_cov_22.684601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_3430_length_15559_cov_22.684601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_virus.2406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537332" accession="ERS11139302">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139302</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537332</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737973_provirus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7737973.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.9552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737973) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559436) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_71_length_147858_cov_52.541490_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9705882352941176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_71_length_147858_cov_52.541490_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738531_virus.150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537275" accession="ERS11139245">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139245</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537275</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737954_virus.57</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7737954.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737954) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561027) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_3_1551__NODE_57_length_106893_cov_10.370019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.954954954954955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_3_1551__NODE_57_length_106893_cov_10.370019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_virus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537277" accession="ERS11139247">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139247</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537277</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737955_provirus.46</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737955.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737955) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560488) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_F_H_7_1932__NODE_46_length_124083_cov_8.303598_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_F_H_7_1932__NODE_46_length_124083_cov_8.303598_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.2062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537278" accession="ERS11139248">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139248</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537278</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737955_virus.447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737955.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>450.565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737955) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560488) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_F_H_7_1932__NODE_447_length_32519_cov_251.256766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_F_H_7_1932__NODE_447_length_32519_cov_251.256766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium infantis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745876_virus.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537279" accession="ERS11139249">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139249</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537279</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737956_provirus.247</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737956.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01486784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.6464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737956) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561101) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_247_length_67800_cov_44.854865_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_247_length_67800_cov_44.854865_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.2294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537280" accession="ERS11139250">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139250</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537280</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737956_virus.140</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured CrAss-like virus sp. virus assembled from ERR7737956.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13997797356828195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737956) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561101) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_140_length_96359_cov_15.273052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_140_length_96359_cov_15.273052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746465_bin.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__P3;g__UBA3388;s__UBA3388 sp900546465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738149_virus.130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; CrAss-like virus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537281" accession="ERS11139251">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139251</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537281</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737956_virus.4295</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737956.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875187332894936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.89504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737956) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561101) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_4295_length_7953_cov_4.598019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_4295_length_7953_cov_4.598019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus vestibularis;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737956_virus.4295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537282" accession="ERS11139252">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139252</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537282</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737956_virus.637</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737956.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737956) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561101) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_637_length_35923_cov_7.802963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_8_1596__NODE_637_length_35923_cov_7.802963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737956_virus.637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537293" accession="ERS11139263">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139263</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537293</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737966_provirus.504</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737966.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.23526345696647236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737966) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_504_length_62311_cov_8.799370_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_504_length_62311_cov_8.799370_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_provirus.514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537295" accession="ERS11139265">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139265</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537295</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737966_virus.1496</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737966.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15566732807002084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.1554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737966) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_1496_length_31066_cov_21.912388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_1496_length_31066_cov_21.912388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738568_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.2368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537297" accession="ERS11139267">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139267</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537297</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737966_virus.613</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737966.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.4571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737966) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_613_length_55760_cov_13.336458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_613_length_55760_cov_13.336458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_virus.1003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537298" accession="ERS11139268">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139268</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537298</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737966_virus.909</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7737966.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.5787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737966) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_909_length_44575_cov_23.228370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_24_2355__NODE_909_length_44575_cov_23.228370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738568_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_provirus.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537300" accession="ERS11139270">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139270</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537300</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737967_virus.16</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7737967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04793421803331155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>160.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559278) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_16_length_181530_cov_92.136498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_16_length_181530_cov_92.136498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537303" accession="ERS11139273">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139273</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537303</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737968_provirus.335</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737968.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737968) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559460) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_22_2670__NODE_335_length_82601_cov_11.628241_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737968_bin.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_22_2670__NODE_335_length_82601_cov_11.628241_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738171_provirus.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537306" accession="ERS11139276">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139276</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537306</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737969_provirus.580</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7737969.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05270140052539582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>135.631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737969) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_580_length_61854_cov_79.565987_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_580_length_61854_cov_79.565987_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537310" accession="ERS11139280">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139280</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537310</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737969_virus.633</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737969.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>159.174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737969) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_633_length_58755_cov_94.651062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_633_length_58755_cov_94.651062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737969_virus.633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537314" accession="ERS11139284">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139284</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537314</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737970_virus.35</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.3994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_35_length_186723_cov_18.207076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7380952380952381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_35_length_186723_cov_18.207076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537321" accession="ERS11139291">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139291</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537321</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.2955</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_2955_length_17200_cov_8.545816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_2955_length_17200_cov_8.545816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.3045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537322" accession="ERS11139292">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139292</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537322</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.3867</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_3867_length_13166_cov_7.972343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_3867_length_13166_cov_7.972343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.12313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537325" accession="ERS11139295">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139295</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537325</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737972_provirus.214</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7737972.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737972) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560076) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_214_length_118782_cov_28.183649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.926829268292683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_214_length_118782_cov_28.183649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_provirus.208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537327" accession="ERS11139297">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139297</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537327</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737972_virus.1202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737972.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.6961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737972) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560076) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_1202_length_36074_cov_18.365114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_1202_length_36074_cov_18.365114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738209_virus.1870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537328" accession="ERS11139298">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139298</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537328</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737972_virus.2108</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737972.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737972) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560076) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_2108_length_23178_cov_5.886022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_2108_length_23178_cov_5.886022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_virus.436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537335" accession="ERS11139305">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139305</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537335</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737973_virus.2717</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737973.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02597739359623315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.5114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737973) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559436) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_2717_length_17895_cov_18.176002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_2717_length_17895_cov_18.176002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.3678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537337" accession="ERS11139307">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139307</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537337</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737973_virus.863</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7737973.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737973) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559436) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_863_length_42303_cov_8.581182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_863_length_42303_cov_8.581182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537341" accession="ERS11139311">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139311</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537341</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737974_virus.429</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737974.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.4195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737974) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560365) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_429_length_52332_cov_15.309789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_429_length_52332_cov_15.309789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_virus.697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537345" accession="ERS11139315">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139315</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537345</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738141_provirus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738141.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738141) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559284) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_71_length_138888_cov_32.205193_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738141_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_71_length_138888_cov_32.205193_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746741_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;s__RUG572 sp900547945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738141_provirus.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537348" accession="ERS11139318">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139318</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537348</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738141_virus.864</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738141.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>77.6957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738141) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559284) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_864_length_39422_cov_43.407345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_864_length_39422_cov_43.407345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537349" accession="ERS11139319">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139319</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537349</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738142_provirus.29</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738142.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738142) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559809) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_29_length_173332_cov_9.830562_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738142_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_29_length_173332_cov_9.830562_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738142_provirus.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537351" accession="ERS11139321">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139321</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537351</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738142_virus.364</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738142.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7924559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738142) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559809) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_364_length_62345_cov_12.057172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_364_length_62345_cov_12.057172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745371_virus.359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537352" accession="ERS11139322">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139322</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537352</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738142_virus.732</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738142.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9299559471365638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.6221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738142) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559809) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_732_length_42602_cov_42.164821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_732_length_42602_cov_42.164821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-791;s__CAG-791 sp000431495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738614_provirus.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537355" accession="ERS11139325">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139325</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537355</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738143_provirus.882</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738143.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738143) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559842) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_882_length_53881_cov_5.454910_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738143_bin.134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_882_length_53881_cov_5.454910_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738143_provirus.882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537358" accession="ERS11139328">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139328</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537358</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738143_virus.4337</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738143.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06434525602217449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738143) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559842) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_4337_length_13253_cov_8.264966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_4337_length_13253_cov_8.264966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_virus.9015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537359" accession="ERS11139329">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139329</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537359</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738143_virus.876</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Methanobacterium virus PhiF1 virus assembled from ERR7738143.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45535389006526233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.7061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738143) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559842) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_876_length_53981_cov_13.039088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738143_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_876_length_53981_cov_13.039088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746779_virus.285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Methanobacterium virus PhiF1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537364" accession="ERS11139334">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139334</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537364</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738145_virus.416</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738145.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>143.079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738145) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559588) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_416_length_70568_cov_87.007022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_416_length_70568_cov_87.007022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738155_virus.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537366" accession="ERS11139336">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139336</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537366</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738146_provirus.1465</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738146.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04430120315873834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738146) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_1465_length_26456_cov_6.978506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_1465_length_26456_cov_6.978506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME165031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__Collinsella sp900542825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_virus.1159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537367" accession="ERS11139337">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139337</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537367</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738146_provirus.768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738146.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>166.681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738146) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_768_length_42998_cov_99.602665_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_768_length_42998_cov_99.602665_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737966_provirus.195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537370" accession="ERS11139340">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139340</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537370</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738146_virus.789</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7738146.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.3163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738146) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_789_length_42223_cov_26.320647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_789_length_42223_cov_26.320647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738192_bin.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp900543455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745388_virus.1037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537376" accession="ERS11139346">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139346</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537376</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738148_provirus.9</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738148.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738148) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559582) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_18_2314__NODE_9_length_346542_cov_7.864214_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738148_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_18_2314__NODE_9_length_346542_cov_7.864214_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738148_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Selenomonas_A;s__Selenomonas_A sp900769615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738145_provirus.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537377" accession="ERS11139347">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139347</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537377</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738148_virus.2040</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738148.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02747797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.97291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738148) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559582) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_18_2314__NODE_2040_length_16981_cov_4.476514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_18_2314__NODE_2040_length_16981_cov_4.476514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.2701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537379" accession="ERS11139349">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139349</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537379</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738149_provirus.232</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738149.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01486784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>91.9772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738149) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561419) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_232_length_67800_cov_55.375323_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_232_length_67800_cov_55.375323_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.2294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537380" accession="ERS11139350">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139350</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537380</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738149_virus.119</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738149.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738149) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561419) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_119_length_100792_cov_10.038554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9012345679012346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_119_length_100792_cov_10.038554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737956_virus.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537381" accession="ERS11139351">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139351</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537381</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738149_virus.334</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738149.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.7964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738149) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561419) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_334_length_53845_cov_25.938328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_334_length_53845_cov_25.938328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537383" accession="ERS11139353">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139353</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537383</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738149_virus.75</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738149.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.6031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738149) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561419) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_75_length_125971_cov_20.819014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.676923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_75_length_125971_cov_20.819014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME246739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;s__Lachnospira eligens_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738149_virus.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537384" accession="ERS11139354">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139354</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537384</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_provirus.293</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04245594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_293_length_103270_cov_10.424845_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738150_bin.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_293_length_103270_cov_10.424845_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738179_virus.1416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537385" accession="ERS11139355">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139355</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537385</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_provirus.690</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09997166296723237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.3784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_690_length_60830_cov_13.157260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738150_bin.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_690_length_60830_cov_13.157260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME258969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__CAG-269 sp001916005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.2731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537387" accession="ERS11139357">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139357</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537387</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_virus.1368</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_1368_length_37680_cov_11.286440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_1368_length_37680_cov_11.286440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0281755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537388" accession="ERS11139358">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139358</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537388</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_virus.1782</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_1782_length_30873_cov_6.761690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_1782_length_30873_cov_6.761690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537389" accession="ERS11139359">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139359</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537389</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_virus.283</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1505.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_283_length_104622_cov_836.446860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_283_length_104622_cov_836.446860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745429_virus.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537391" accession="ERS11139361">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139361</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537391</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_virus.769</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_769_length_56097_cov_6.318815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6486486486486487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_769_length_56097_cov_6.318815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.1297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537331" accession="ERS11139301">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139301</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537331</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737973_provirus.167</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737973.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>321.785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737973) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559436) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_167_length_103647_cov_199.156155_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_167_length_103647_cov_199.156155_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537333" accession="ERS11139303">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139303</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537333</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737973_virus.1528</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737973.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737973) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559436) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_1528_length_28899_cov_11.400285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_1528_length_28899_cov_11.400285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537334" accession="ERS11139304">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139304</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537334</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737973_virus.1795</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737973.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.34627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737973) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559436) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_1795_length_25405_cov_3.473823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737973_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_1795_length_25405_cov_3.473823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537336" accession="ERS11139306">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139306</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537336</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737973_virus.507</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737973.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737973) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559436) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_507_length_59081_cov_9.683208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_8_2643__NODE_507_length_59081_cov_9.683208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_provirus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537339" accession="ERS11139309">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139309</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537339</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737974_virus.1102</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737974.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737974) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560365) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_1102_length_29421_cov_7.868627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_1102_length_29421_cov_7.868627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_virus.731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537340" accession="ERS11139310">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139310</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537340</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737974_virus.234</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737974.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.1343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737974) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560365) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_234_length_73223_cov_25.161950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.34285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_234_length_73223_cov_25.161950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_virus.251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537342" accession="ERS11139312">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139312</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537342</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737974_virus.6741</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7737974.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.28116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737974) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560365) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_6741_length_8800_cov_3.383125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_6741_length_8800_cov_3.383125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_provirus.3519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537344" accession="ERS11139314">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139314</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537344</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738141_provirus.22</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738141.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.23584667126223852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.1946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738141) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559284) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_22_length_251009_cov_23.456462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738141_bin.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_22_length_251009_cov_23.456462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_provirus.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537346" accession="ERS11139316">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139316</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537346</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738141_virus.1378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cellulophaga phage Ingeline_8 virus assembled from ERR7738141.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01734581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.20618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738141) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559284) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_1378_length_28198_cov_4.703567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_1378_length_28198_cov_4.703567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cellulophaga phage Ingeline_8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537347" accession="ERS11139317">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139317</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537347</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738141_virus.264</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738141.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.1006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738141) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559284) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_264_length_77571_cov_54.291532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_21_2512__NODE_264_length_77571_cov_54.291532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738141_virus.264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537350" accession="ERS11139320">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139320</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537350</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738142_virus.1053</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738142.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.5503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738142) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559809) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_1053_length_33067_cov_16.565050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_23_2446__NODE_1053_length_33067_cov_16.565050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.1267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537353" accession="ERS11139323">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139323</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537353</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738143_provirus.183</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738143.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11497797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738143) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559842) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_183_length_131500_cov_9.317494_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_183_length_131500_cov_9.317494_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537354" accession="ERS11139324">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139324</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537354</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738143_provirus.544</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738143.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11216960352422913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738143) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559842) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_544_length_72809_cov_5.926374_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_544_length_72809_cov_5.926374_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_provirus.1616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537393" accession="ERS11139363">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139363</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537393</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_provirus.296</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04245594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_296_length_104393_cov_11.022892_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738151_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_296_length_104393_cov_11.022892_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738179_virus.1416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537395" accession="ERS11139365">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139365</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537395</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_virus.1175</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.7672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_1175_length_41921_cov_14.458752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_1175_length_41921_cov_14.458752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__UMGS1585;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.3089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537396" accession="ERS11139366">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139366</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537396</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_virus.15671</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04790239424235011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_15671_length_5313_cov_9.050802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_15671_length_5313_cov_9.050802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738552_bin.347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.30465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537397" accession="ERS11139367">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139367</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537397</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_virus.2202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>186.731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_2202_length_25881_cov_115.773330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738151_bin.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_2202_length_25881_cov_115.773330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900541925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_provirus.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537398" accession="ERS11139368">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139368</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537398</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_virus.40983</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.2912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_40983_length_2550_cov_30.147594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_40983_length_2550_cov_30.147594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.13523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537399" accession="ERS11139369">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139369</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537399</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_virus.765</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_765_length_57309_cov_6.798120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4230769230769231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_765_length_57309_cov_6.798120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738151_virus.765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537402" accession="ERS11139372">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139372</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537402</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_virus.1057</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.7077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_1057_length_36083_cov_14.851164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_1057_length_36083_cov_14.851164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.1518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537403" accession="ERS11139373">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139373</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537403</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_virus.141</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Poxviridae virus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.0902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_141_length_133713_cov_50.655467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_141_length_133713_cov_50.655467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_virus.141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Pokkesviricetes; Chitovirales; Poxviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537404" accession="ERS11139374">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139374</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537404</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_virus.2247</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08589947736157162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_2247_length_20034_cov_16.845768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_2247_length_20034_cov_16.845768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_virus.395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537406" accession="ERS11139376">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139376</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537406</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_virus.554</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_554_length_54997_cov_14.069446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_554_length_54997_cov_14.069446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME158868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900549635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537407" accession="ERS11139377">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139377</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537407</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_virus.754</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_754_length_45436_cov_9.234397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738152_bin.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_754_length_45436_cov_9.234397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537409" accession="ERS11139379">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139379</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537409</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738153_provirus.208</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Lambdavirus provirus assembled from ERR7738153.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11321451267193008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>142.503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738153) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559832) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_17_2487__NODE_208_length_76991_cov_85.522766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738153_bin.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5652173913043478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_17_2487__NODE_208_length_76991_cov_85.522766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0300980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus; unclassified Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537411" accession="ERS11139381">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139381</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537411</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738154_provirus.18</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae provirus assembled from ERR7738154.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.8363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738154) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561215) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_1_1600__NODE_18_length_166231_cov_15.689577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738154_bin.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_1_1600__NODE_18_length_166231_cov_15.689577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.1478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537414" accession="ERS11139384">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139384</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537414</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738155_provirus.175</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738155.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.8472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738155) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560511) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_175_length_90103_cov_19.747484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738155_bin.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9655172413793104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_175_length_90103_cov_19.747484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger formicilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0337868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537415" accession="ERS11139385">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139385</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537415</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738155_virus.1147</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738155.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.48729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738155) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560511) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_1147_length_22016_cov_4.599617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_1147_length_22016_cov_4.599617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738155_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Megasphaeraceae;g__Megasphaera;s__Megasphaera sp002431345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_provirus.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537416" accession="ERS11139386">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139386</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537416</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738155_virus.577</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738155.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.83597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738155) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560511) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_577_length_39778_cov_5.498275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_577_length_39778_cov_5.498275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0369830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537417" accession="ERS11139387">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139387</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537417</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738155_virus.792</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738155.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738155) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560511) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_792_length_30886_cov_24.003765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_1_1938__NODE_792_length_30886_cov_24.003765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738237_provirus.710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537418" accession="ERS11139388">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139388</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537418</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738156_provirus.223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738156.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.839977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>149.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738156) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559605) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_223_length_80159_cov_95.324580_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_223_length_80159_cov_95.324580_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537420" accession="ERS11139390">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139390</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537420</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738156_virus.372</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738156.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738156) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559605) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_372_length_63199_cov_6.960267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_372_length_63199_cov_6.960267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738156_virus.372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537421" accession="ERS11139391">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139391</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537421</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738156_virus.662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738156.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738156) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559605) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_662_length_45098_cov_6.209058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_662_length_45098_cov_6.209058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537423" accession="ERS11139393">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139393</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537423</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738157_provirus.438</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738157.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.3901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738157) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_438_length_74695_cov_18.419470_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6129032258064516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_438_length_74695_cov_18.419470_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745577_provirus.301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537424" accession="ERS11139394">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139394</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537424</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738157_provirus.808</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lugh provirus assembled from ERR7738157.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738157) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_808_length_50338_cov_15.875768_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738157_bin.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_808_length_50338_cov_15.875768_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745427_virus.461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lughvirus; Faecalibacterium virus Lugh</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537425" accession="ERS11139395">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139395</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537425</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738157_virus.1299</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738157.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.6069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738157) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_1299_length_36429_cov_15.104203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738157_bin.126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_1299_length_36429_cov_15.104203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737974_provirus.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537426" accession="ERS11139396">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139396</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537426</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738157_virus.2210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738157.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05501014145734777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.13712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738157) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_2210_length_23159_cov_4.074647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_2210_length_23159_cov_4.074647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__UMGS1585;s__UMGS1585 sp900553205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.3089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537427" accession="ERS11139397">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139397</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537427</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738157_virus.585</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738157.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>124.804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738157) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_585_length_62483_cov_72.556405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_585_length_62483_cov_72.556405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738182_virus.387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537428" accession="ERS11139398">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139398</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537428</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_provirus.1139</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus provirus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_1139_length_44303_cov_5.889522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_1139_length_44303_cov_5.889522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_provirus.1139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537432" accession="ERS11139402">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139402</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537432</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_virus.1517</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07665282975512436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.94045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_1517_length_36756_cov_5.086207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_1517_length_36756_cov_5.086207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738251_virus.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537434" accession="ERS11139404">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139404</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537434</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_virus.587</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.2901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_587_length_65261_cov_13.029562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_587_length_65261_cov_13.029562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738604_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745329_virus.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537436" accession="ERS11139406">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139406</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537436</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738159_provirus.4</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7738159.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.1795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738159) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559470) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_24_2672__NODE_4_length_481924_cov_29.869463_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738159_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_24_2672__NODE_4_length_481924_cov_29.869463_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737968_provirus.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537437" accession="ERS11139407">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139407</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537437</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738159_virus.280</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738159.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738159) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559470) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_24_2672__NODE_280_length_86031_cov_8.738407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4583333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_24_2672__NODE_280_length_86031_cov_8.738407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738201_virus.326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537438" accession="ERS11139408">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139408</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537438</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738160_provirus.1360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738160.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.0583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738160) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559708) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_1360_length_34637_cov_20.761198_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_1360_length_34637_cov_20.761198_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_provirus.2857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537439" accession="ERS11139409">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139409</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537439</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738160_provirus.452</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738160.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2702991463254911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738160) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559708) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_452_length_68009_cov_6.557661_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738160_bin.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_452_length_68009_cov_6.557661_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738160_provirus.452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537440" accession="ERS11139410">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139410</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537440</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738160_provirus.880</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738160.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05270140052539582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>164.061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738160) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559708) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_880_length_45836_cov_93.558579_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_880_length_45836_cov_93.558579_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537441" accession="ERS11139411">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139411</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537441</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738160_virus.1839</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738160.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.2012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738160) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559708) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_1839_length_27053_cov_3.950030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738160_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_1839_length_27053_cov_3.950030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738160_virus.1839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537266" accession="ERS11139236">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139236</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537266</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737621_provirus.1</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1422.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560434) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_16_1853__NODE_1_length_681967_cov_842.858445_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737621_bin.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_16_1853__NODE_1_length_681967_cov_842.858445_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium infantis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737621_provirus.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537272" accession="ERS11139242">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139242</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537272</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737953_virus.234</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_7_1595__NODE_234_length_55319_cov_11.454763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_H_7_1595__NODE_234_length_55319_cov_11.454763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737953_virus.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537287" accession="ERS11139257">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139257</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537287</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737964_virus.32</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737964.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737964) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_32_length_210607_cov_15.926334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5576923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_32_length_210607_cov_15.926334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737964_virus.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537289" accession="ERS11139259">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139259</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537289</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737964_virus.891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737964.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.4412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737964) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_891_length_37856_cov_24.415892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9032258064516128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_19_1040__NODE_891_length_37856_cov_24.415892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Bariatricus;s__Bariatricus comes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737964_virus.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537301" accession="ERS11139271">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139271</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537301</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737967_virus.462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.6261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559278) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_462_length_44859_cov_23.641575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_22_2578__NODE_462_length_44859_cov_23.641575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737967_virus.462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537308" accession="ERS11139278">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139278</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537308</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737969_virus.1440</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737969.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737969) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_1440_length_33074_cov_7.420038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_9_2667__NODE_1440_length_33074_cov_7.420038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738178_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-465;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737969_virus.1440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537311" accession="ERS11139281">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139281</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537311</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737970_provirus.296</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.31702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_296_length_72613_cov_4.765813_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8837209302325582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_11_1558__NODE_296_length_72613_cov_4.765813_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME041878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__CAG-1024;s__CAG-1024 sp000432015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_provirus.296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537320" accession="ERS11139290">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139290</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537320</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.182</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.8092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_182_length_126191_cov_29.115895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6833333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_182_length_126191_cov_29.115895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745924_bin.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__F23-B02;s__F23-B02 sp900556535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537323" accession="ERS11139293">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139293</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537323</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.582</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_582_length_65697_cov_8.903459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5217391304347826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_582_length_65697_cov_8.903459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-791;s__CAG-791 sp000431495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537338" accession="ERS11139308">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139308</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537338</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737974_provirus.460</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7737974.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737974) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560365) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_460_length_50135_cov_8.736845_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737974_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_17_1798__NODE_460_length_50135_cov_8.736845_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737974_provirus.460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537343" accession="ERS11139313">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139313</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537343</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738140_provirus.55</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2753052220829365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.4142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560325) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_1_1802__NODE_55_length_140753_cov_22.322585_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738140_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_1_1802__NODE_55_length_140753_cov_22.322585_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME151566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Ruminococcus_B;s__Ruminococcus_B gnavus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738140_provirus.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537356" accession="ERS11139326">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139326</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537356</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738143_virus.1402</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738143.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738143) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559842) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_1402_length_38195_cov_5.831943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_1402_length_38195_cov_5.831943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745687_bin.245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA6382;s__UBA6382 sp900557555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738143_virus.1402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537360" accession="ERS11139330">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139330</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537360</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738144_virus.159</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>141.523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567441) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_1_THA1057JZ__NODE_159_length_78420_cov_84.249952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4411764705882353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_1_THA1057JZ__NODE_159_length_78420_cov_84.249952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella stercorea</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738144_virus.159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537361" accession="ERS11139331">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139331</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537361</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738144_virus.364</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567441) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_1_THA1057JZ__NODE_364_length_42505_cov_11.002852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_1_THA1057JZ__NODE_364_length_42505_cov_11.002852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738144_virus.364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537371" accession="ERS11139341">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139341</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537371</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738146_virus.919</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738146.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738146) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_919_length_37948_cov_11.482876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738146_bin.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4482758620689655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_919_length_37948_cov_11.482876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737972_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__RUG115;s__RUG115 sp900066395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_virus.919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537373" accession="ERS11139343">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139343</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537373</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738147_virus.15</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738147.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00141699478838352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.6542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738147) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559941) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_15_length_187820_cov_50.021977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_15_length_187820_cov_50.021977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738147_virus.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537375" accession="ERS11139345">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139345</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537375</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738147_virus.86</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738147.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.5791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738147) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559941) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_86_length_105151_cov_21.511687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43333333333333335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_86_length_105151_cov_21.511687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738147_virus.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537382" accession="ERS11139352">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139352</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537382</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738149_virus.52</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738149.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.23891699478838352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.2765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738149) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561419) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_52_length_156277_cov_38.142574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5853658536585366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_10_1510__NODE_52_length_156277_cov_38.142574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738149_virus.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537386" accession="ERS11139356">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139356</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537386</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_virus.1064</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_1064_length_44774_cov_7.528246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_1064_length_44774_cov_7.528246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738150_virus.1064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537394" accession="ERS11139364">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139364</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537394</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_virus.1005</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.77976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_1005_length_47821_cov_4.790382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_1005_length_47821_cov_4.790382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-628;s__CAG-628 sp000438415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738151_virus.1005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537400" accession="ERS11139370">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139370</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537400</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_provirus.145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_145_length_131934_cov_14.075885_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5853658536585366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_145_length_131934_cov_14.075885_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_provirus.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537401" accession="ERS11139371">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139371</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537401</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_provirus.366</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus provirus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.9963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_366_length_73332_cov_26.136509_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738152_bin.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_366_length_73332_cov_26.136509_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_provirus.366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537408" accession="ERS11139378">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139378</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537408</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_virus.894</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_894_length_40459_cov_15.373904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_894_length_40459_cov_15.373904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_virus.894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537410" accession="ERS11139380">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139380</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537410</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738153_virus.425</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738153.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>154.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738153) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559832) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_17_2487__NODE_425_length_36766_cov_87.779852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_17_2487__NODE_425_length_36766_cov_87.779852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium infantis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738153_virus.425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537413" accession="ERS11139383">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139383</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537413</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738154_virus.77</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738154.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738154) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561215) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_1_1600__NODE_77_length_102688_cov_12.310600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_1_1600__NODE_77_length_102688_cov_12.310600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738154_virus.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537422" accession="ERS11139392">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139392</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537422</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738157_provirus.110</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738157.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.1083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738157) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_110_length_150314_cov_10.581621_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738157_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_19_2669__NODE_110_length_150314_cov_10.581621_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738157_provirus.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537429" accession="ERS11139399">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139399</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537429</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_provirus.314</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_314_length_90437_cov_9.846824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9310344827586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_314_length_90437_cov_9.846824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_provirus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537433" accession="ERS11139403">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139403</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537433</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_virus.269</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_269_length_96741_cov_16.876738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_269_length_96741_cov_16.876738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_virus.269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537435" accession="ERS11139405">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139405</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537435</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_virus.918</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_918_length_49800_cov_12.311023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_918_length_49800_cov_12.311023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_virus.918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537442" accession="ERS11139412">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139412</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537442</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738160_virus.3410</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738160.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.00972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738160) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559708) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_3410_length_16402_cov_4.554487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_3410_length_16402_cov_4.554487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900768995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738160_virus.3410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537443" accession="ERS11139413">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139413</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537443</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738160_virus.768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738160.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738160) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559708) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_768_length_50472_cov_8.085961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_2_2343__NODE_768_length_50472_cov_8.085961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738160_virus.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537444" accession="ERS11139414">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139414</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537444</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738161_provirus.127</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738161.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.4231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738161) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560062) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_127_length_113956_cov_29.679739_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738161_bin.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_127_length_113956_cov_29.679739_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537445" accession="ERS11139415">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139415</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537445</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738161_provirus.717</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738161.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.7969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738161) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560062) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_717_length_42434_cov_16.962958_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738161_bin.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_717_length_42434_cov_16.962958_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0307965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537446" accession="ERS11139416">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139416</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537446</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738161_virus.173</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738161.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>212.743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738161) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560062) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_173_length_97887_cov_122.101247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_173_length_97887_cov_122.101247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_virus.173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537448" accession="ERS11139418">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139418</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537448</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738161_virus.654</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738161.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.5012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738161) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560062) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_654_length_44301_cov_11.739372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_654_length_44301_cov_11.739372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_virus.436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537449" accession="ERS11139419">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139419</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537449</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_provirus.1789</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09512584798925876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.65886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1789_length_35121_cov_5.225459_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1789_length_35121_cov_5.225459_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0328474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537450" accession="ERS11139420">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139420</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537450</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_provirus.62</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_62_length_191983_cov_6.534772_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_62_length_191983_cov_6.534772_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_provirus.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537451" accession="ERS11139421">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139421</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537451</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.1132</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1132_length_47595_cov_7.167410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1132_length_47595_cov_7.167410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UBA3789;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_virus.968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537452" accession="ERS11139422">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139422</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537452</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.1476</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1476_length_39926_cov_7.885844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1476_length_39926_cov_7.885844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537454" accession="ERS11139424">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139424</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537454</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.2659</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.20783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_2659_length_26113_cov_5.555308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_2659_length_26113_cov_5.555308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537455" accession="ERS11139425">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139425</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537455</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured CrAss-like virus sp. virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.0008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_342_length_92480_cov_20.643074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_342_length_92480_cov_20.643074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; CrAss-like virus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537456" accession="ERS11139426">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139426</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537456</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.4694</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_4694_length_16150_cov_9.721209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_4694_length_16150_cov_9.721209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.6231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537457" accession="ERS11139427">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139427</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537457</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.709</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.10895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_709_length_62392_cov_5.124529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5405405405405406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_709_length_62392_cov_5.124529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537458" accession="ERS11139428">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139428</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537458</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.9650</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.86949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_9650_length_8318_cov_3.929256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_9650_length_8318_cov_3.929256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.9650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537459" accession="ERS11139429">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139429</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537459</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738163_provirus.201</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738163.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>197.531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738163) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_201_length_101965_cov_152.782339_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_201_length_101965_cov_152.782339_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.3646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537460" accession="ERS11139430">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139430</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537460</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738163_provirus.955</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus provirus assembled from ERR7738163.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.88592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738163) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_955_length_44372_cov_5.079038_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_955_length_44372_cov_5.079038_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-1000;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.2677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537461" accession="ERS11139431">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139431</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537461</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738163_virus.17</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738163.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.2563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738163) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_17_length_267745_cov_20.190938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8412698412698413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_17_length_267745_cov_20.190938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537463" accession="ERS11139433">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139433</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537463</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738164_provirus.110</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738164.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738164) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561040) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_15_1568__NODE_110_length_65240_cov_6.063011_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_15_1568__NODE_110_length_65240_cov_6.063011_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738164_provirus.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537464" accession="ERS11139434">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139434</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537464</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738164_virus.1817</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738164.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.0937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738164) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561040) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_15_1568__NODE_1817_length_13774_cov_11.362862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_15_1568__NODE_1817_length_13774_cov_11.362862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738164_virus.1817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537317" accession="ERS11139287">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139287</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537317</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737971_virus.1080</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7737971.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.8431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737971) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_1080_length_42361_cov_26.958140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_5_2654__NODE_1080_length_42361_cov_26.958140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746398_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Oribacterium;s__Oribacterium sp900772695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537357" accession="ERS11139327">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139327</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537357</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738143_virus.2655</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bdellovibrio phage phi1422 virus assembled from ERR7738143.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12261836275922892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.17195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738143) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559842) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_2655_length_21947_cov_3.836671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738143_bin.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_3_2480__NODE_2655_length_21947_cov_3.836671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_virus.2404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Bdellovibrio phage phi1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537362" accession="ERS11139332">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139332</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537362</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738145_provirus.335</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738145.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738145) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559588) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_335_length_82112_cov_6.260182_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_335_length_82112_cov_6.260182_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537363" accession="ERS11139333">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139333</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537363</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738145_virus.1338</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738145.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738145) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559588) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_1338_length_29662_cov_8.948623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738145_bin.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_1338_length_29662_cov_8.948623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME029635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-417;s__CAG-417 sp000432835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.1296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537365" accession="ERS11139335">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139335</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537365</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738145_virus.800</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738145.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.6269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738145) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559588) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_800_length_45056_cov_21.645168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738145_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_24_2316__NODE_800_length_45056_cov_21.645168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747615_virus.1414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537368" accession="ERS11139338">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139338</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537368</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738146_virus.1603</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vegasvirus virus assembled from ERR7738146.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.79443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738146) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_1603_length_24876_cov_3.789024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_1603_length_24876_cov_3.789024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME266163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;s__Lachnospira sp900316325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_virus.1603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Gochnauervirinae; Vegasvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537374" accession="ERS11139344">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139344</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537374</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738147_virus.339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738147.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738147) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559941) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_339_length_56156_cov_8.617415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_339_length_56156_cov_8.617415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_virus.1035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537378" accession="ERS11139348">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139348</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537378</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738148_virus.804</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738148.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738148) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559582) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_18_2314__NODE_804_length_41411_cov_7.292858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_18_2314__NODE_804_length_41411_cov_7.292858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738169_virus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537390" accession="ERS11139360">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139360</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537390</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738150_virus.429</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738150.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.6527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738150) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_429_length_83754_cov_29.753050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_10_2221__NODE_429_length_83754_cov_29.753050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738150_virus.429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537392" accession="ERS11139362">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139362</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537392</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738151_provirus.114</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738151.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738151) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560211) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_114_length_165305_cov_6.462694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_16_2233__NODE_114_length_165305_cov_6.462694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738150_virus.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537412" accession="ERS11139382">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139382</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537412</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738154_virus.1244</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738154.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.0154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738154) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561215) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_1_1600__NODE_1244_length_23841_cov_31.406497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_1_1600__NODE_1244_length_23841_cov_31.406497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.2254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537419" accession="ERS11139389">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139389</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537419</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738156_provirus.700</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738156.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738156) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559605) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_700_length_43357_cov_11.300601_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738156_bin.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_10_2310__NODE_700_length_43357_cov_11.300601_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_virus.319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537430" accession="ERS11139400">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139400</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537430</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_provirus.527</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.6744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_527_length_69202_cov_22.280260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5757575757575758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_527_length_69202_cov_22.280260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745743_virus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537261" accession="ERS11139231">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139231</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537261</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737607_virus.18</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7737607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_6_1557__NODE_18_length_59667_cov_19.689965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4893617021276595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_6_1557__NODE_18_length_59667_cov_19.689965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737607_virus.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537262" accession="ERS11139232">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139232</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537262</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737617_virus.162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7737617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.5137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560303) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_19_1779__NODE_162_length_59640_cov_32.245589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_19_1779__NODE_162_length_59640_cov_32.245589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737617_virus.162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537276" accession="ERS11139246">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139246</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537276</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737955_provirus.147</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7737955.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737955) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560488) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_F_H_7_1932__NODE_147_length_67774_cov_7.743726_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7737955_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_F_H_7_1932__NODE_147_length_67774_cov_7.743726_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737955_provirus.147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537330" accession="ERS11139300">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139300</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537330</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7737972_virus.70</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7737972.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.0264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7737972) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560076) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_70_length_217453_cov_13.820661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_8_2155__NODE_70_length_217453_cov_13.820661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737972_virus.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537369" accession="ERS11139339">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139339</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537369</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738146_virus.35</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738146.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01328965839962564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>106.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738146) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_35_length_192140_cov_62.729386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5423728813559322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_24_2678__NODE_35_length_192140_cov_62.729386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_virus.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537405" accession="ERS11139375">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139375</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537405</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738152_virus.36</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738152.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>198.477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738152) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561091) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_36_length_238499_cov_101.425292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4696969696969697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_1_1586__NODE_36_length_238499_cov_101.425292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_virus.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537431" accession="ERS11139401">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139401</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537431</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738158_virus.1296</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738158.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>294.568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738158) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_1296_length_40453_cov_167.289900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_18_2193__NODE_1296_length_40453_cov_167.289900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_virus.1296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537447" accession="ERS11139417">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139417</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537447</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738161_virus.317</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738161.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>103.295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738161) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560062) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_317_length_68956_cov_58.269894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738161_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_10_2156__NODE_317_length_68956_cov_58.269894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_virus.317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537453" accession="ERS11139423">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139423</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537453</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738162_virus.1734</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738162.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.7801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738162) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560186) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1734_length_35883_cov_30.318354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_8_2217__NODE_1734_length_35883_cov_30.318354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738568_bin.326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__SFEL01;s__SFEL01 sp004557245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.1393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537462" accession="ERS11139432">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139432</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537462</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738163_virus.607</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738163.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738163) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_607_length_59834_cov_9.273022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_10_2185__NODE_607_length_59834_cov_9.273022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537465" accession="ERS11139435">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139435</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537465</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738164_virus.45</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7738164.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10135789431064383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738164) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561040) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_15_1568__NODE_45_length_95683_cov_6.324352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9382716049382716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_15_1568__NODE_45_length_95683_cov_6.324352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537466" accession="ERS11139436">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139436</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537466</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738165_provirus.293</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738165.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>234.517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738165) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559683) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_293_length_76219_cov_141.046820_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_293_length_76219_cov_141.046820_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0347583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537467" accession="ERS11139437">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139437</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537467</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738165_provirus.75</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738165.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>219.558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738165) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559683) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_75_length_129618_cov_129.412881_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_75_length_129618_cov_129.412881_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537468" accession="ERS11139438">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139438</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537468</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738165_virus.143</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738165.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738165) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559683) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_143_length_103862_cov_13.319950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_143_length_103862_cov_13.319950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745410_virus.278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537469" accession="ERS11139439">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139439</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537469</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738165_virus.316</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Schitoviridae virus assembled from ERR7738165.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06303277487972782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.0031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738165) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559683) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_316_length_73969_cov_18.264305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_24_2342__NODE_316_length_73969_cov_18.264305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738189_virus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Schitoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537470" accession="ERS11139440">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139440</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537470</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_provirus.121</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_121_length_174784_cov_8.223082_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738166_bin.148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7407407407407407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_121_length_174784_cov_8.223082_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A sp000436615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_provirus.121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537471" accession="ERS11139441">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139441</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537471</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_provirus.295</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Uetakevirus provirus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.39735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_295_length_116149_cov_5.433653_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738166_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5192307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_295_length_116149_cov_5.433653_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_provirus.295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Uetakevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537472" accession="ERS11139442">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139442</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537472</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_virus.1011</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.3141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_1011_length_55614_cov_11.502152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8979591836734694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_1011_length_55614_cov_11.502152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_virus.1011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537473" accession="ERS11139443">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139443</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537473</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_virus.1516</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.6946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_1516_length_40995_cov_43.899237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_1516_length_40995_cov_43.899237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_virus.1516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537474" accession="ERS11139444">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139444</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537474</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_virus.1669</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.6029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_1669_length_37732_cov_14.458903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6176470588235294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_1669_length_37732_cov_14.458903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0341321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537475" accession="ERS11139445">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139445</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537475</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_virus.2779</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.6959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_2779_length_23669_cov_12.829561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_2779_length_23669_cov_12.829561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__UBA644;g__UBA644;s__UBA644 sp900547165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745398_provirus.103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537476" accession="ERS11139446">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139446</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537476</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_virus.4516</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.19837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_4516_length_15465_cov_4.132376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_4516_length_15465_cov_4.132376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_virus.4516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537477" accession="ERS11139447">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139447</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537477</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738166_virus.850</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738166.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.9212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738166) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560179) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_850_length_61675_cov_21.902919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_24_2201__NODE_850_length_61675_cov_21.902919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537478" accession="ERS11139448">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139448</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537478</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738167_provirus.263</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738167.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.8626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738167) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559455) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_263_length_109236_cov_30.475059_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738167_bin.176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_263_length_109236_cov_30.475059_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_provirus.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537479" accession="ERS11139449">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139449</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537479</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738167_provirus.761</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7738167.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738167) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559455) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_761_length_59458_cov_6.284771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738167_bin.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_761_length_59458_cov_6.284771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738167_provirus.761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537480" accession="ERS11139450">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139450</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537480</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738167_virus.1198</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738167.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738167) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559455) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_1198_length_43406_cov_7.406864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_1198_length_43406_cov_7.406864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__RUG472;s__RUG472 sp900545265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738167_virus.1198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537481" accession="ERS11139451">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139451</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537481</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738167_virus.1716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7738167.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.38275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738167) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559455) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_1716_length_33681_cov_5.212147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_1716_length_33681_cov_5.212147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_virus.231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537482" accession="ERS11139452">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139452</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537482</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738167_virus.2453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738167.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738167) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559455) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_2453_length_25732_cov_8.098187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_2453_length_25732_cov_8.098187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537483" accession="ERS11139453">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139453</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537483</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738167_virus.4178</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738167.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04343334073279703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.2471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738167) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559455) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_4178_length_16496_cov_18.101285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_4178_length_16496_cov_18.101285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537484" accession="ERS11139454">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139454</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537484</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738167_virus.6656</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738167.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>230.488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738167) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559455) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_6656_length_10867_cov_135.223262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_9_2646__NODE_6656_length_10867_cov_135.223262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.12584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537485" accession="ERS11139455">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139455</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537485</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738168_provirus.636</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7738168.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14888717346443942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738168) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_636_length_50443_cov_6.059385_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738168_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_636_length_50443_cov_6.059385_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_provirus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537486" accession="ERS11139456">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139456</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537486</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738168_virus.1364</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738168.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738168) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_1364_length_28853_cov_8.746942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_1364_length_28853_cov_8.746942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738581_bin.271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745408_virus.974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537487" accession="ERS11139457">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139457</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537487</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738168_virus.2772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738168.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738168) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_2772_length_17140_cov_8.516849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_2772_length_17140_cov_8.516849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.4145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537488" accession="ERS11139458">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139458</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537488</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738168_virus.61</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738168.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06433820478765558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738168) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_61_length_181164_cov_6.580715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_61_length_181164_cov_6.580715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides sp900549585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537489" accession="ERS11139459">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139459</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537489</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738168_virus.851</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738168.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.1916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738168) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_851_length_40662_cov_20.454823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_11_1511__NODE_851_length_40662_cov_20.454823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_virus.1078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537490" accession="ERS11139460">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139460</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537490</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738169_provirus.30</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738169.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738169) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_30_length_237824_cov_10.176549_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_30_length_237824_cov_10.176549_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537491" accession="ERS11139461">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139461</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537491</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738169_provirus.944</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738169.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.29767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738169) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_944_length_36049_cov_4.635883_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_944_length_36049_cov_4.635883_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-245;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747615_virus.2042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537492" accession="ERS11139462">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139462</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537492</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738169_virus.392</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738169.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.9325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738169) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_392_length_70568_cov_43.207941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_392_length_70568_cov_43.207941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738155_virus.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537493" accession="ERS11139463">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139463</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537493</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738169_virus.6535</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Microviridae virus assembled from ERR7738169.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.76676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738169) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_6535_length_5432_cov_4.025210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_15_2497__NODE_6535_length_5432_cov_4.025210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747615_virus.16257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537494" accession="ERS11139464">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139464</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537494</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738170_provirus.27</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738170.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>311.497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738170) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_27_length_239812_cov_177.582743_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738170_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_27_length_239812_cov_177.582743_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738612_bin.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__WCHB1-69;g__F23-D06;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_provirus.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537495" accession="ERS11139465">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139465</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537495</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738170_provirus.705</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738170.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.48661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738170) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_705_length_56096_cov_5.401096_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_705_length_56096_cov_5.401096_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900549865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738612_provirus.660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537496" accession="ERS11139466">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139466</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537496</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738170_virus.1357</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr131_1 virus assembled from ERR7738170.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.4474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738170) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_1357_length_36859_cov_11.853869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_1357_length_36859_cov_11.853869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; crAssphage cr131_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537498" accession="ERS11139468">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139468</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537498</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738170_virus.823</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738170.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04730176211453744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738170) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_823_length_50612_cov_5.825745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8529411764705882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_823_length_50612_cov_5.825745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738612_virus.1007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537499" accession="ERS11139469">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139469</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537499</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738171_provirus.359</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738171.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738171) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_6_2676__NODE_359_length_78261_cov_11.081922_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738171_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_6_2676__NODE_359_length_78261_cov_11.081922_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;s__Mediterraneibacter lactaris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737968_provirus.496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537500" accession="ERS11139470">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139470</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537500</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738171_virus.1176</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738171.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1128166006446914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738171) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_6_2676__NODE_1176_length_23550_cov_9.972607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_6_2676__NODE_1176_length_23550_cov_9.972607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_provirus.1602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537501" accession="ERS11139471">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139471</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537501</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738171_virus.799</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738171.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.6773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738171) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_6_2676__NODE_799_length_36503_cov_11.299127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_6_2676__NODE_799_length_36503_cov_11.299127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0325958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537502" accession="ERS11139472">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139472</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537502</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738172_provirus.329</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738172.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05270140052539582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>142.664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738172) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_329_length_81574_cov_87.458446_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_329_length_81574_cov_87.458446_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537503" accession="ERS11139473">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139473</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537503</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738172_virus.1008</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738172.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738172) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_1008_length_42297_cov_6.176646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_1008_length_42297_cov_6.176646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537504" accession="ERS11139474">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139474</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537504</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738172_virus.160</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738172.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.8967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738172) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_160_length_110118_cov_21.802728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.627906976744186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_160_length_110118_cov_21.802728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R sp000436835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738172_virus.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537506" accession="ERS11139476">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139476</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537506</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738172_virus.525</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738172.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6824339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>265.253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738172) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_525_length_63351_cov_148.943847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_525_length_63351_cov_148.943847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537507" accession="ERS11139477">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139477</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537507</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738172_virus.856</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738172.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.9599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738172) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_856_length_46842_cov_20.578403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_856_length_46842_cov_20.578403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_virus.471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537508" accession="ERS11139478">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139478</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537508</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738173_provirus.163</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738173.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1674559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.6433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738173) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_163_length_98526_cov_40.229591_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738173_bin.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_163_length_98526_cov_40.229591_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738246_provirus.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537509" accession="ERS11139479">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139479</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537509</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738173_provirus.98</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738173.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15495594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>215.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738173) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_98_length_124484_cov_124.072785_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_98_length_124484_cov_124.072785_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738153_virus.426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537510" accession="ERS11139480">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139480</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537510</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738173_virus.522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738173.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537510</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>189.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738173) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_522_length_45203_cov_107.837898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_522_length_45203_cov_107.837898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738173_virus.522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537512" accession="ERS11139482">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139482</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537512</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738174_provirus.83</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738174.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738174) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560999) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_83_length_180035_cov_6.616438_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738174_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_83_length_180035_cov_6.616438_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738559_bin.129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_provirus.1362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537513" accession="ERS11139483">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139483</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537513</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738174_virus.1135</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738174.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.3623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738174) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560999) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_1135_length_38728_cov_28.199658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_1135_length_38728_cov_28.199658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737954_virus.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537514" accession="ERS11139484">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139484</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537514</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738174_virus.1277</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738174.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738174) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560999) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_1277_length_35183_cov_7.250812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_1277_length_35183_cov_7.250812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738174_virus.1277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537515" accession="ERS11139485">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139485</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537515</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738174_virus.259</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738174.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.9432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738174) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560999) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_259_length_100221_cov_26.571257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_259_length_100221_cov_26.571257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537516" accession="ERS11139486">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139486</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537516</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738174_virus.659</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738174.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738174) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560999) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_659_length_56188_cov_7.245210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_12_1563__NODE_659_length_56188_cov_7.245210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738174_virus.659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537517" accession="ERS11139487">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139487</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537517</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738175_provirus.14</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738175.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.1822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738175) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_24_1254__NODE_14_length_335182_cov_48.927566_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738175_bin.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9666666666666668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_24_1254__NODE_14_length_335182_cov_48.927566_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738226_virus.423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537518" accession="ERS11139488">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139488</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537518</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738175_virus.552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738175.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5399779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.3933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738175) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_24_1254__NODE_552_length_39029_cov_40.394383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6136363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_24_1254__NODE_552_length_39029_cov_40.394383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0271284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537519" accession="ERS11139489">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139489</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537519</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_provirus.2291</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02736784140969162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_2291_length_24661_cov_8.930076_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738176_bin.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_2291_length_24661_cov_8.930076_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger variabilis_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745556_provirus.786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537521" accession="ERS11139491">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139491</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537521</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_virus.1391</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03469162995594714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.69798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_1391_length_34148_cov_4.947345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_1391_length_34148_cov_4.947345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_virus.1391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537522" accession="ERS11139492">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139492</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537522</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_virus.2033</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Teubervirus virus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09419107100014808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.4325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_2033_length_26918_cov_13.710741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_2033_length_26918_cov_13.710741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Teubervirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537523" accession="ERS11139493">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139493</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537523</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_virus.2878</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16375920696193652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.7082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_2878_length_21159_cov_21.853382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_2878_length_21159_cov_21.853382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.3747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537524" accession="ERS11139494">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139494</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537524</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_virus.4194</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.32103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_4194_length_16211_cov_3.646275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_4194_length_16211_cov_3.646275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_provirus.1546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537525" accession="ERS11139495">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139495</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537525</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_virus.583</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.8037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_583_length_55609_cov_25.826442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_583_length_55609_cov_25.826442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_virus.583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537527" accession="ERS11139497">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139497</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537527</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738177_provirus.229</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738177.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738177) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_229_length_108777_cov_10.167829_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738177_bin.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_229_length_108777_cov_10.167829_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_provirus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537528" accession="ERS11139498">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139498</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537528</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738177_provirus.579</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738177.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.5347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738177) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_579_length_64655_cov_29.358249_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738177_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_579_length_64655_cov_29.358249_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_provirus.784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537529" accession="ERS11139499">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139499</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537529</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738177_virus.1258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738177.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738177) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_1258_length_39268_cov_5.747212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_1258_length_39268_cov_5.747212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537530" accession="ERS11139500">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139500</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537530</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738177_virus.2118</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738177.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7449339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.37117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738177) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_2118_length_26208_cov_4.653438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_2118_length_26208_cov_4.653438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745990_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__UBA1731;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_virus.2118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537531" accession="ERS11139501">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139501</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537531</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738177_virus.3108</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Picovirinae virus assembled from ERR7738177.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738177) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_3108_length_18198_cov_12.204017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_3108_length_18198_cov_12.204017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus_A;s__Coprococcus_A sp900548825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_virus.3141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae; Picovirinae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537532" accession="ERS11139502">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139502</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537532</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738177_virus.616</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738177.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06334385272040957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738177) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_616_length_62408_cov_7.229228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_4_1118__NODE_616_length_62408_cov_7.229228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537534" accession="ERS11139504">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139504</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537534</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738178_virus.1027</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738178.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738178) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559934) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_1027_length_41670_cov_10.440916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_1027_length_41670_cov_10.440916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0316936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537535" accession="ERS11139505">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139505</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537535</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738178_virus.1321</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Pseudomonas phage PPAT virus assembled from ERR7738178.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738178) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559934) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_1321_length_34899_cov_10.757434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_1321_length_34899_cov_10.757434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0240361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Pseudomonas phage PPAT</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537536" accession="ERS11139506">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139506</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537536</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738178_virus.1987</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738178.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.864977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>222.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738178) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559934) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_1987_length_25649_cov_129.932700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_1987_length_25649_cov_129.932700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537537" accession="ERS11139507">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139507</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537537</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738178_virus.397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Schitoviridae virus assembled from ERR7738178.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06303277487972782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738178) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559934) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_397_length_73969_cov_13.424755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_397_length_73969_cov_13.424755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738189_virus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Schitoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537538" accession="ERS11139508">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139508</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537538</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738178_virus.606</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738178.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738178) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559934) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_606_length_57816_cov_5.782210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.96875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_606_length_57816_cov_5.782210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738233_virus.173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537540" accession="ERS11139510">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139510</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537540</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_provirus.219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11497797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.7404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_219_length_131503_cov_12.372149_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_219_length_131503_cov_12.372149_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537541" accession="ERS11139511">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139511</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537541</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_provirus.508</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>225.114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_508_length_84361_cov_127.862690_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_508_length_84361_cov_127.862690_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_provirus.587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537542" accession="ERS11139512">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139512</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537542</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_provirus.931</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_931_length_58305_cov_9.292626_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_931_length_58305_cov_9.292626_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537543" accession="ERS11139513">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139513</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537543</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_virus.1464</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.902477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_1464_length_40970_cov_8.834153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_1464_length_40970_cov_8.834153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_virus.1418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537544" accession="ERS11139514">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139514</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537544</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_virus.2047</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_2047_length_31085_cov_7.137835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_2047_length_31085_cov_7.137835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-917;g__CAG-475;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_provirus.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537545" accession="ERS11139515">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139515</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537545</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_virus.344</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08777445863796453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>158.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_344_length_105495_cov_89.615768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.972972972972973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_344_length_105495_cov_89.615768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537546" accession="ERS11139516">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139516</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537546</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_virus.610</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Schitoviridae virus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11513032049760293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.33734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_610_length_74941_cov_5.316494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_610_length_74941_cov_5.316494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Schitoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537547" accession="ERS11139517">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139517</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537547</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738179_virus.951</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738179.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.29569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738179) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_951_length_57243_cov_4.621488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_7_2482__NODE_951_length_57243_cov_4.621488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1696;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_provirus.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537548" accession="ERS11139518">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139518</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537548</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738180_provirus.397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738180.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.9533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738180) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_397_length_53457_cov_16.636905_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_397_length_53457_cov_16.636905_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.1326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537550" accession="ERS11139520">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139520</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537550</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738180_virus.150</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738180.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.2667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738180) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_150_length_89064_cov_32.490791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_150_length_89064_cov_32.490791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738174_virus.335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537551" accession="ERS11139521">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139521</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537551</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738180_virus.258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738180.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738180) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_258_length_67681_cov_7.310588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5227272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_258_length_67681_cov_7.310588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537552" accession="ERS11139522">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139522</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537552</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738180_virus.523</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738180.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738180) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_523_length_44879_cov_7.681845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_523_length_44879_cov_7.681845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UMGS1491;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738180_virus.523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537553" accession="ERS11139523">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139523</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537553</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738180_virus.6284</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Inovirus sp. virus assembled from ERR7738180.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.24855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738180) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_6284_length_6374_cov_5.051929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738180_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_6284_length_6374_cov_5.051929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738180_virus.6284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae; Inovirus; unclassified Inovirus; Inovirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537554" accession="ERS11139524">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139524</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537554</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738180_virus.922</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738180.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.8559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738180) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_922_length_31034_cov_45.174920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_922_length_31034_cov_45.174920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_provirus.563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537556" accession="ERS11139526">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139526</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537556</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738181_virus.1013</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738181.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738181) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559867) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_1013_length_40891_cov_10.213064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_1013_length_40891_cov_10.213064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_virus.1013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537557" accession="ERS11139527">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139527</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537557</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738181_virus.1257</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738181.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.41338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738181) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559867) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_1257_length_35887_cov_4.094387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_1257_length_35887_cov_4.094387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.2223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537558" accession="ERS11139528">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139528</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537558</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738181_virus.1893</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738181.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.2544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738181) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559867) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_1893_length_27075_cov_8.124120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_1893_length_27075_cov_8.124120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_virus.1893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537559" accession="ERS11139529">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139529</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537559</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738181_virus.5637</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738181.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01076461450908247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.02635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738181) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559867) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_5637_length_11460_cov_5.106738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_5637_length_11460_cov_5.106738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900547315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_virus.5637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537560" accession="ERS11139530">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139530</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537560</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738181_virus.8359</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738181.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07897769092469892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.2663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738181) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559867) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_8359_length_8121_cov_17.038662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_8359_length_8121_cov_17.038662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_virus.8359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537561" accession="ERS11139531">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139531</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537561</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738182_provirus.189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738182.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738182) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559464) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_189_length_124640_cov_17.775070_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738182_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_189_length_124640_cov_17.775070_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747083_bin.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__UMGS1994;s__UMGS1994 sp900556975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537562" accession="ERS11139532">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139532</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537562</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738182_provirus.515</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cellulophaga phage Ingeline_8 provirus assembled from ERR7738182.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06701541850220269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738182) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559464) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_515_length_69025_cov_10.522843_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_515_length_69025_cov_10.522843_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cellulophaga phage Ingeline_8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537564" accession="ERS11139534">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139534</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537564</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738182_virus.1471</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738182.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738182) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559464) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_1471_length_33310_cov_5.998255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_1471_length_33310_cov_5.998255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737969_virus.1402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537565" accession="ERS11139535">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139535</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537565</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738182_virus.3317</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738182.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06194933920704848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>235.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738182) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559464) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_3317_length_15493_cov_135.791651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_3317_length_15493_cov_135.791651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.9369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537566" accession="ERS11139536">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139536</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537566</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738182_virus.940</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738182.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>113.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738182) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559464) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_940_length_46840_cov_65.840151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_940_length_46840_cov_65.840151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_virus.471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537567" accession="ERS11139537">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139537</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537567</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738183_virus.156</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738183.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2717.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738183) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560497) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_24_1936__NODE_156_length_38032_cov_1614.112212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9130434782608696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_24_1936__NODE_156_length_38032_cov_1614.112212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737955_virus.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537568" accession="ERS11139538">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139538</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537568</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738183_virus.356</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738183.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.3374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738183) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560497) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_24_1936__NODE_356_length_18685_cov_22.281384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_24_1936__NODE_356_length_18685_cov_22.281384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium infantis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738183_virus.356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537570" accession="ERS11139540">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139540</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537570</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738184_provirus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06798872201629169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561120) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_71_length_101110_cov_13.355735_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738184_bin.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_71_length_101110_cov_13.355735_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>uncultured Caudovirales phage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME108259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900313215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_provirus.763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537571" accession="ERS11139541">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139541</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537571</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738184_virus.1171</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06029648506968258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.65817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561120) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_1171_length_26429_cov_4.524021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_1171_length_26429_cov_4.524021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;s__Anaerobutyricum hallii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_virus.1171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537572" accession="ERS11139542">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139542</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537572</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738184_virus.288</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561120) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_288_length_55023_cov_16.146180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8867924528301887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_288_length_55023_cov_16.146180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_virus.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537573" accession="ERS11139543">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139543</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537573</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738184_virus.439</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.2671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561120) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_439_length_46121_cov_15.904591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_439_length_46121_cov_15.904591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_virus.800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537574" accession="ERS11139544">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139544</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537574</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738184_virus.816</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7093893088856106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561120) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_816_length_33308_cov_6.571906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3636363636363637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_816_length_33308_cov_6.571906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_virus.816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537575" accession="ERS11139545">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139545</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537575</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738185_provirus.1256</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738185.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5751674891193073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738185) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559765) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_1256_length_37297_cov_49.732348_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_1256_length_37297_cov_49.732348_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745427_provirus.1148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537576" accession="ERS11139546">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139546</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537576</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738185_provirus.669</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738185.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.0948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738185) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559765) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_669_length_58838_cov_37.217032_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738185_bin.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_669_length_58838_cov_37.217032_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745403_provirus.913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537577" accession="ERS11139547">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139547</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537577</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738185_virus.1254</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738185.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738185) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559765) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_1254_length_37325_cov_9.210025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_1254_length_37325_cov_9.210025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745975_bin.137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;s__CAG-488 sp000434055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_virus.1320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537579" accession="ERS11139549">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139549</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537579</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738185_virus.714</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738185.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738185) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559765) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_714_length_55901_cov_46.213528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_714_length_55901_cov_46.213528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745438_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp004553545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745412_virus.659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537580" accession="ERS11139550">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139550</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537580</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_provirus.316</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.3417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_316_length_93449_cov_27.362775_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_316_length_93449_cov_27.362775_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537581" accession="ERS11139551">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139551</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537581</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_provirus.543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus provirus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_543_length_69216_cov_48.597680_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738186_bin.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_543_length_69216_cov_48.597680_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738186_provirus.543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537582" accession="ERS11139552">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139552</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537582</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.1045</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_1045_length_46943_cov_5.722613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738186_bin.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_1045_length_46943_cov_5.722613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537583" accession="ERS11139553">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139553</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537583</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.1263</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4629947229551451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.75099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_1263_length_41699_cov_4.805512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_1263_length_41699_cov_4.805512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0280262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537585" accession="ERS11139555">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139555</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537585</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.9326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_233_length_107321_cov_18.046968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_233_length_107321_cov_18.046968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738186_virus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537586" accession="ERS11139556">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139556</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537586</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.323</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.824700000000007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_323_length_92481_cov_43.427709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6176470588235294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_323_length_92481_cov_43.427709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537587" accession="ERS11139557">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139557</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537587</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.5495</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05443336174904671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_5495_length_13343_cov_11.375320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_5495_length_13343_cov_11.375320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738186_virus.5495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537588" accession="ERS11139558">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139558</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537588</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.688</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>788.712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_688_length_61468_cov_454.200762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_688_length_61468_cov_454.200762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746436_virus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537589" accession="ERS11139559">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139559</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537589</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.964</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.5799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_964_length_49517_cov_44.033981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_964_length_49517_cov_44.033981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738192_bin.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp900543455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537590" accession="ERS11139560">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139560</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537590</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738187_provirus.645</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738187.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.5669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738187) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_645_length_47607_cov_10.240900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738187_bin.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_645_length_47607_cov_10.240900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746345_bin.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-460;s__CAG-460 sp000437315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_provirus.645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537591" accession="ERS11139561">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139561</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537591</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738187_virus.1354</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738187.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738187) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_1354_length_26974_cov_12.568762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_1354_length_26974_cov_12.568762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737953_virus.342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537593" accession="ERS11139563">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139563</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537593</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738187_virus.754</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738187.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738187) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_754_length_42484_cov_6.258165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_754_length_42484_cov_6.258165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_virus.754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537594" accession="ERS11139564">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139564</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537594</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738188_provirus.177</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738188.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05743392070484582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.1872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738188) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562249) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_177_length_117489_cov_14.129655_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738188_bin.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_177_length_117489_cov_14.129655_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747426_bin.316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D succinifaciens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_provirus.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537595" accession="ERS11139565">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139565</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537595</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738188_provirus.593</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn provirus assembled from ERR7738188.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02113157861825958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738188) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562249) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_593_length_54164_cov_8.672472_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4827586206896552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_593_length_54164_cov_8.672472_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_provirus.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537596" accession="ERS11139566">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139566</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537596</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738188_virus.1309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738188.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.0623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738188) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562249) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_1309_length_30294_cov_14.328524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_1309_length_30294_cov_14.328524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537597" accession="ERS11139567">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139567</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537597</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738188_virus.271</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738188.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.8022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738188) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562249) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_271_length_88905_cov_23.499302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9583333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_271_length_88905_cov_23.499302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_virus.269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537599" accession="ERS11139569">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139569</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537599</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738189_provirus.22</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738189.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>109.783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738189) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_22_length_248899_cov_66.452351_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738189_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_22_length_248899_cov_66.452351_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_virus.540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537600" accession="ERS11139570">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139570</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537600</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738189_virus.1000</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738189.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738189) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_1000_length_38687_cov_6.249055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_1000_length_38687_cov_6.249055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738249_provirus.651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537601" accession="ERS11139571">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139571</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537601</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738189_virus.1493</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738189.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06993392070484582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.41219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738189) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_1493_length_29168_cov_5.069712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_1493_length_29168_cov_5.069712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_provirus.121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537602" accession="ERS11139572">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139572</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537602</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738189_virus.183</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738189.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.4122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738189) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_183_length_96319_cov_25.797417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_183_length_96319_cov_25.797417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738198_virus.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537603" accession="ERS11139573">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139573</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537603</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738189_virus.3419</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738189.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.3167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738189) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_3419_length_14530_cov_29.837404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_3419_length_14530_cov_29.837404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.7067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537604" accession="ERS11139574">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139574</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537604</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738189_virus.564</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738189.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18145698646964648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738189) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_564_length_54832_cov_10.930819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9761904761904762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_3_2503__NODE_564_length_54832_cov_10.930819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_virus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537605" accession="ERS11139575">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139575</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537605</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_provirus.1161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.3423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1161_length_42055_cov_5.168993_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738190_bin.165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1161_length_42055_cov_5.168993_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0307965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537606" accession="ERS11139576">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139576</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537606</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_provirus.676</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45535389006526233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.4981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_676_length_58996_cov_38.455609_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_676_length_58996_cov_38.455609_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746779_virus.285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537608" accession="ERS11139578">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139578</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537608</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_virus.1279</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.3663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1279_length_39433_cov_21.912313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1279_length_39433_cov_21.912313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-452;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738190_virus.1279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537609" accession="ERS11139579">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139579</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537609</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_virus.1715</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.1443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1715_length_32310_cov_35.819409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1715_length_32310_cov_35.819409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>Siphoviridae environmental samples</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp900552845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_provirus.2122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537610" accession="ERS11139580">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139580</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537610</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_virus.2662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24245594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_2662_length_23293_cov_6.725793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_2662_length_23293_cov_6.725793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537611" accession="ERS11139581">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139581</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537611</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_virus.3197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_3197_length_20094_cov_5.721637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738190_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_3197_length_20094_cov_5.721637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738614_bin.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__HGM13634;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.2098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537612" accession="ERS11139582">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139582</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537612</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_virus.548</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.4646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_548_length_66731_cov_26.299532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_548_length_66731_cov_26.299532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745603_virus.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537614" accession="ERS11139584">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139584</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537614</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738191_provirus.168</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738191.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11497797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.0491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738191) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560289) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_168_length_109637_cov_14.001771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738191_bin.148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_168_length_109637_cov_14.001771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Megasphaeraceae;g__Megasphaera;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738191_provirus.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537615" accession="ERS11139585">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139585</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537615</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738191_provirus.719</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738191.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738191) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560289) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_719_length_50670_cov_35.251675_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_719_length_50670_cov_35.251675_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0308158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537616" accession="ERS11139586">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139586</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537616</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738191_virus.1133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738191.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06971365638766522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.8056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738191) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560289) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_1133_length_37270_cov_22.365768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_1133_length_37270_cov_22.365768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738191_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537617" accession="ERS11139587">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139587</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537617</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738191_virus.584</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738191.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.40059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738191) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560289) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_584_length_57108_cov_4.166138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_584_length_57108_cov_4.166138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900290275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537618" accession="ERS11139588">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139588</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537618</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738191_virus.947</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738191.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.5615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738191) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560289) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_947_length_42366_cov_19.049091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_2_2306__NODE_947_length_42366_cov_19.049091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537619" accession="ERS11139589">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139589</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537619</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_provirus.184</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.7423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_184_length_153339_cov_15.970260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9210526315789472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_184_length_153339_cov_15.970260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_provirus.591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537620" accession="ERS11139590">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139590</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537620</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_provirus.68</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.8521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_68_length_239511_cov_20.132291_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738192_bin.182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_68_length_239511_cov_20.132291_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME255709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__UMGS1883;f__UMGS1883;g__UMGS1540;s__UMGS1540 sp900552775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738192_provirus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537622" accession="ERS11139592">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139592</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537622</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_virus.1381</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.2144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_1381_length_44149_cov_15.200309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_1381_length_44149_cov_15.200309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__Ruminococcus sp900540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.1072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537623" accession="ERS11139593">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139593</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537623</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_virus.1748</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.6317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_1748_length_36407_cov_15.098541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_1748_length_36407_cov_15.098541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537624" accession="ERS11139594">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139594</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537624</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_virus.2262</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>163.232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_2262_length_29662_cov_95.089032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_2262_length_29662_cov_95.089032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME029635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-417;s__CAG-417 sp000432835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.1296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537625" accession="ERS11139595">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139595</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537625</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_virus.3378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_3378_length_20509_cov_5.772661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_3378_length_20509_cov_5.772661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738192_virus.3378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537626" accession="ERS11139596">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139596</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537626</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_virus.712</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.67269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_712_length_69814_cov_5.467800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_712_length_69814_cov_5.467800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738651_bin.237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UBA5905;s__UBA5905 sp900763035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537628" accession="ERS11139598">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139598</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537628</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738193_provirus.740</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738193.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11976362772252648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738193) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561932) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_740_length_48954_cov_7.204350_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_740_length_48954_cov_7.204350_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537629" accession="ERS11139599">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139599</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537629</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738193_virus.1489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738193.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03997797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738193) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561932) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_1489_length_29840_cov_12.041058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_1489_length_29840_cov_12.041058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738216_bin.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp900555115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.1720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537630" accession="ERS11139600">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139600</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537630</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738193_virus.3087</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Beecentumtrevirus virus assembled from ERR7738193.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738193) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561932) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_3087_length_16382_cov_8.079301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_3087_length_16382_cov_8.079301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738569_virus.5962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae; Picovirinae; Beecentumtrevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537631" accession="ERS11139601">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139601</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537631</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738193_virus.518</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738193.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.1019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738193) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561932) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_518_length_61914_cov_44.522228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.35294117647058826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_518_length_61914_cov_44.522228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745603_virus.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537632" accession="ERS11139602">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139602</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537632</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738193_virus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738193.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2849118942731278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.2629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738193) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561932) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_71_length_160186_cov_18.367050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_71_length_160186_cov_18.367050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738167_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFHK01;s__SFHK01 sp004556395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537633" accession="ERS11139603">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139603</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537633</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738193_virus.962</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738193.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.30731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738193) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561932) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_962_length_40478_cov_4.922279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7096774193548387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_962_length_40478_cov_4.922279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.1446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537634" accession="ERS11139604">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139604</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537634</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_provirus.400</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.3286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_400_length_79425_cov_21.787153_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_400_length_79425_cov_21.787153_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME150720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp003526955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_provirus.400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537635" accession="ERS11139605">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139605</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537635</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_provirus.730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.9007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_730_length_58768_cov_16.957660_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738194_bin.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_730_length_58768_cov_16.957660_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745975_bin.137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;s__CAG-488 sp000434055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_provirus.730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537637" accession="ERS11139607">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139607</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537637</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_virus.1231</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1231_length_41819_cov_8.066096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1231_length_41819_cov_8.066096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.1231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537638" accession="ERS11139608">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139608</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537638</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_virus.1517</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.7565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1517_length_36459_cov_48.597823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1517_length_36459_cov_48.597823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900547315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.1517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537639" accession="ERS11139609">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139609</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537639</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_virus.1706</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.4132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1706_length_33988_cov_33.323376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1706_length_33988_cov_33.323376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.1706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537640" accession="ERS11139610">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139610</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537640</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_virus.2380</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04365360504997764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.99427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_2380_length_26849_cov_4.146571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738194_bin.222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_2380_length_26849_cov_4.146571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.2380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537641" accession="ERS11139611">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139611</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537641</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_virus.3953</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.99867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_3953_length_18238_cov_4.983977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_3953_length_18238_cov_4.983977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.3953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537643" accession="ERS11139613">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139613</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537643</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738195_provirus.229</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738195.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12687137493752415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>280.018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738195) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_229_length_97919_cov_155.699005_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_229_length_97919_cov_155.699005_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746692_virus.1019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537644" accession="ERS11139614">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139614</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537644</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738195_virus.1705</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738195.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.2816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738195) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_1705_length_23811_cov_4.521151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738195_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_1705_length_23811_cov_4.521151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738555_provirus.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537645" accession="ERS11139615">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139615</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537645</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738195_virus.5293</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738195.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738195) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_5293_length_8308_cov_9.565059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_5293_length_8308_cov_9.565059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745433_virus.4077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537646" accession="ERS11139616">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139616</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537646</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738195_virus.740</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738195.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738195) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_740_length_47505_cov_5.493759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738195_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6046511627906976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_9_1560__NODE_740_length_47505_cov_5.493759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-4429795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537647" accession="ERS11139617">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139617</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537647</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738196_provirus.17</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738196.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>111.397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738196) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559919) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_17_length_248769_cov_65.440083_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738196_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_17_length_248769_cov_65.440083_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_virus.540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537648" accession="ERS11139618">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139618</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537648</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738196_virus.1304</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738196.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.2128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738196) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559919) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_1304_length_32666_cov_41.987511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_1304_length_32666_cov_41.987511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746344_bin.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__TF01-11;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745420_virus.880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537649" accession="ERS11139619">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139619</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537649</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738196_virus.185</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738196.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.3546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738196) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559919) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_185_length_96319_cov_24.586397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_185_length_96319_cov_24.586397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738198_virus.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537651" accession="ERS11139621">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139621</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537651</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738196_virus.561</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738196.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08631662166094103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.5251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738196) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559919) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_561_length_53920_cov_59.888231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_561_length_53920_cov_59.888231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537652" accession="ERS11139622">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139622</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537652</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738196_virus.822</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738196.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.94157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738196) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559919) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_822_length_43792_cov_4.420725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4230769230769231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_822_length_43792_cov_4.420725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-127;s__CAG-127 sp900319515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738196_virus.822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537653" accession="ERS11139623">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139623</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537653</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738197_provirus.1362</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus provirus assembled from ERR7738197.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.2905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738197) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559597) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_1362_length_42048_cov_9.764433_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738197_bin.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_1362_length_42048_cov_9.764433_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_provirus.1362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537654" accession="ERS11139624">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139624</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537654</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738197_provirus.489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738197.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07747797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738197) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559597) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_489_length_80775_cov_70.747937_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_489_length_80775_cov_70.747937_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_virus.773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537655" accession="ERS11139625">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139625</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537655</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738197_virus.1319</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738197.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>120.667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738197) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559597) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_1319_length_42696_cov_70.770454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_1319_length_42696_cov_70.770454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.1742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537372" accession="ERS11139342">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139342</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537372</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738147_provirus.479</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738147.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-26T20:28:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738147) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559941) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_479_length_46264_cov_9.405850_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_21_1948__NODE_479_length_46264_cov_9.405850_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0307965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537497" accession="ERS11139467">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139467</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537497</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738170_virus.2484</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738170.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738170) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_2484_length_23415_cov_5.849216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_5_2673__NODE_2484_length_23415_cov_5.849216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0296258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537505" accession="ERS11139475">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139475</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537505</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738172_virus.2361</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738172.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.48438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738172) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_2361_length_22038_cov_5.301307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_1_2641__NODE_2361_length_22038_cov_5.301307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738172_virus.2361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537511" accession="ERS11139481">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139481</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537511</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738173_virus.917</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738173.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.2109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738173) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_917_length_28298_cov_16.824563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_4_1344__NODE_917_length_28298_cov_16.824563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738246_virus.447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537520" accession="ERS11139490">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139490</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537520</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_virus.1014</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_1014_length_40498_cov_12.736350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_1014_length_40498_cov_12.736350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_virus.1014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537526" accession="ERS11139496">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139496</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537526</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738176_virus.8261</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7738176.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.2805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738176) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_8261_length_9929_cov_4.786947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_5_1561__NODE_8261_length_9929_cov_4.786947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738575_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-279;s__CAG-279 sp000437795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_provirus.1115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537533" accession="ERS11139503">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139503</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537533</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738178_provirus.13</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738178.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.6517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738178) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559934) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_13_length_305394_cov_9.780061_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738178_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_13_length_305394_cov_9.780061_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__Zag1;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738174_provirus.240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537539" accession="ERS11139509">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139509</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537539</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738178_virus.841</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738178.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738178) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559934) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_841_length_47256_cov_12.308018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738178_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_23_2501__NODE_841_length_47256_cov_12.308018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738178_virus.841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537549" accession="ERS11139519">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139519</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537549</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738180_virus.113</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738180.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.1781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738180) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_113_length_108902_cov_26.852736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7714285714285715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_17_1555__NODE_113_length_108902_cov_26.852736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537555" accession="ERS11139525">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139525</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537555</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738181_provirus.381</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738181.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>198.887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738181) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559867) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_381_length_74804_cov_123.307707_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_11_2484__NODE_381_length_74804_cov_123.307707_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_virus.345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537563" accession="ERS11139533">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139533</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537563</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738182_provirus.95</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7738182.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.8034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738182) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559464) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_95_length_163759_cov_29.377812_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738182_bin.134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48148148148148145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_3_2666__NODE_95_length_163759_cov_29.377812_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.1266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537569" accession="ERS11139539">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139539</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537569</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738184_provirus.22</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561120) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_22_length_148309_cov_10.625486_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_7_1597__NODE_22_length_148309_cov_10.625486_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746785_provirus.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537578" accession="ERS11139548">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139548</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537578</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738185_virus.283</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738185.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.6105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738185) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559765) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_283_length_102807_cov_9.517385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8108108108108109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_16_2361__NODE_283_length_102807_cov_9.517385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME101580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Enterocloster;s__Enterocloster sp000431375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_virus.367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537584" accession="ERS11139554">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139554</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537584</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738186_virus.1545</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738186.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.79273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738186) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559485) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_1545_length_36185_cov_5.694417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_15_2660__NODE_1545_length_36185_cov_5.694417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.1657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537592" accession="ERS11139562">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139562</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537592</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738187_virus.316</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738187.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.0476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738187) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_316_length_81249_cov_24.379133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_11_1507__NODE_316_length_81249_cov_24.379133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_virus.316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537598" accession="ERS11139568">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139568</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537598</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738188_virus.819</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738188.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1432089863334723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>213.016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738188) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562249) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_819_length_42249_cov_121.121242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_9_1509__NODE_819_length_42249_cov_121.121242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537607" accession="ERS11139577">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139577</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537607</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_virus.1125</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.2894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1125_length_42738_cov_26.983381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_1125_length_42738_cov_26.983381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_virus.1371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537613" accession="ERS11139583">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139583</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537613</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738190_virus.785</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738190.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.814977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738190) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560021) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_785_length_53423_cov_7.109661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_19_2137__NODE_785_length_53423_cov_7.109661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537621" accession="ERS11139591">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139591</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537621</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738192_provirus.891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738192.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738192) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560556) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_891_length_60040_cov_7.559378_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738192_bin.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_22_1935__NODE_891_length_60040_cov_7.559378_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738192_provirus.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537627" accession="ERS11139597">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139597</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537627</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738193_provirus.190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738193.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02597739359623315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738193) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561932) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_190_length_109696_cov_17.347504_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738193_bin.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_3_2652__NODE_190_length_109696_cov_17.347504_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__UBA644;g__UBA644;s__UBA644 sp900547165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_provirus.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537636" accession="ERS11139606">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139606</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537636</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_virus.1059</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1641249917049939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1059_length_46092_cov_12.644594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9534883720930232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_1059_length_46092_cov_12.644594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME026750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_provirus.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537642" accession="ERS11139612">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139612</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537642</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738194_virus.7493</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738194.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16667301809413704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.47565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738194) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560009) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_7493_length_11013_cov_3.341533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_1_1951__NODE_7493_length_11013_cov_3.341533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.7493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537650" accession="ERS11139620">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139620</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537650</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738196_virus.333</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738196.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.8585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738196) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559919) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_333_length_72135_cov_43.864096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_21_2500__NODE_333_length_72135_cov_43.864096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537656" accession="ERS11139626">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139626</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537656</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738197_virus.17023</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Gokushovirus WZ-2015a virus assembled from ERR7738197.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.9793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738197) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559597) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_17023_length_5072_cov_18.465465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_17023_length_5072_cov_18.465465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_provirus.14699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; unclassified Gokushovirinae; Gokushovirus WZ-2015a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537657" accession="ERS11139627">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139627</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537657</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738197_virus.29687</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738197.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.84221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738197) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559597) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_29687_length_3192_cov_3.334831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_29687_length_3192_cov_3.334831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_virus.29687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537658" accession="ERS11139628">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139628</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537658</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738197_virus.480</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738197.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.9448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738197) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559597) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_480_length_81486_cov_48.617929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6451612903225806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_480_length_81486_cov_48.617929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537659" accession="ERS11139629">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139629</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537659</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738197_virus.950</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738197.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.3755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738197) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559597) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_950_length_53984_cov_12.165674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_1_2682__NODE_950_length_53984_cov_12.165674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_virus.950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537660" accession="ERS11139630">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139630</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537660</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738198_provirus.900</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738198.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738198) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561738) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_900_length_32401_cov_6.039723_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_900_length_32401_cov_6.039723_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738198_provirus.900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537661" accession="ERS11139631">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139631</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537661</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738198_virus.157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738198.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.5491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738198) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561738) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_157_length_81288_cov_36.792417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_157_length_81288_cov_36.792417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738198_virus.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537662" accession="ERS11139632">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139632</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537662</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738198_virus.38</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738198.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07320867667127609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738198) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561738) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_38_length_151525_cov_6.267848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_38_length_151525_cov_6.267848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME260425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__CAG-492 sp000434335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537663" accession="ERS11139633">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139633</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537663</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738198_virus.54</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738198.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738198) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561738) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_54_length_134137_cov_12.501775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6101694915254238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_54_length_134137_cov_12.501775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537664" accession="ERS11139634">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139634</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537664</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738198_virus.855</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738198.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.76566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738198) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561738) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_855_length_33558_cov_4.349183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_23_1260__NODE_855_length_33558_cov_4.349183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537665" accession="ERS11139635">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139635</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537665</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738199_provirus.453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738199.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.1876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738199) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_453_length_48892_cov_22.764396_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_453_length_48892_cov_22.764396_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_provirus.507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537666" accession="ERS11139636">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139636</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537666</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738199_virus.112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738199.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738199) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_112_length_97209_cov_13.256373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2962962962962963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_112_length_97209_cov_13.256373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738199_virus.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537667" accession="ERS11139637">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139637</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537667</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738199_virus.264</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738199.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02747797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.6756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738199) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_264_length_65253_cov_32.292424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_264_length_65253_cov_32.292424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745874_virus.174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537668" accession="ERS11139638">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139638</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537668</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738199_virus.544</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738199.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.97805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738199) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560895) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_544_length_44120_cov_5.181232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_9_1548__NODE_544_length_44120_cov_5.181232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745967_virus.496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537669" accession="ERS11139639">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139639</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537669</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738200_provirus.1</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738200.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.0176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738200) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560555) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_1_length_507761_cov_17.099950_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738200_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_1_length_507761_cov_17.099950_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738256_provirus.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537670" accession="ERS11139640">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139640</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537670</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738200_provirus.275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738200.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.7952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738200) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560555) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_275_length_82794_cov_42.424302_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_275_length_82794_cov_42.424302_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537671" accession="ERS11139641">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139641</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537671</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738200_provirus.654</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738200.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05721365638766521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.2316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738200) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560555) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_654_length_52794_cov_26.946184_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_654_length_52794_cov_26.946184_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.1137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537672" accession="ERS11139642">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139642</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537672</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738200_virus.1306</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738200.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738200) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560555) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_1306_length_35321_cov_58.344342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_1306_length_35321_cov_58.344342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738534_virus.1616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537673" accession="ERS11139643">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139643</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537673</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738200_virus.2177</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738200.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15495594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738200) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560555) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_2177_length_25131_cov_7.647282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_2177_length_25131_cov_7.647282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.2254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537674" accession="ERS11139644">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139644</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537674</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738200_virus.637</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738200.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.28698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738200) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560555) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_637_length_53544_cov_5.574654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_22_1638__NODE_637_length_53544_cov_5.574654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537675" accession="ERS11139645">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139645</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537675</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738201_provirus.105</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7738201.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>858.972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738201) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559521) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_105_length_169732_cov_503.747594_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8955223880597015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_105_length_169732_cov_503.747594_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_virus.260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537676" accession="ERS11139646">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139646</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537676</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738201_provirus.371</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738201.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.2824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738201) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559521) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_371_length_78379_cov_13.525491_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738201_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_371_length_78379_cov_13.525491_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;s__Mediterraneibacter lactaris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737968_provirus.496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537677" accession="ERS11139647">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139647</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537677</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738201_virus.177</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738201.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738201) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559521) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_177_length_125803_cov_9.728656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_177_length_125803_cov_9.728656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745888_virus.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537678" accession="ERS11139648">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139648</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537678</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738201_virus.841</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738201.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.5178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738201) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559521) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_841_length_39070_cov_25.505450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6774193548387096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_21_2679__NODE_841_length_39070_cov_25.505450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0339015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537679" accession="ERS11139649">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139649</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537679</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738202_provirus.368</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738202.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.6967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738202) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560964) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_368_length_48947_cov_16.269634_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_368_length_48947_cov_16.269634_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537680" accession="ERS11139650">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139650</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537680</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738202_virus.1161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738202.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18368843659633977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738202) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560964) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_1161_length_23298_cov_5.923819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_1161_length_23298_cov_5.923819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537681" accession="ERS11139651">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139651</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537681</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738202_virus.338</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738202.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738202) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560964) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_338_length_51319_cov_7.973186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_338_length_51319_cov_7.973186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738202_virus.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537682" accession="ERS11139652">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139652</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537682</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738202_virus.697</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738202.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.22488986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>108.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738202) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560964) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_697_length_32456_cov_48.300133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_5_1556__NODE_697_length_32456_cov_48.300133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746233_virus.358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537683" accession="ERS11139653">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139653</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537683</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738203_provirus.343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738203.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>126.749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738203) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560247) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_343_length_78247_cov_75.449648_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738203_bin.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_343_length_78247_cov_75.449648_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_provirus.343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537684" accession="ERS11139654">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139654</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537684</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738203_virus.1098</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738203.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>118.494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738203) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560247) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_1098_length_38386_cov_69.585476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_1098_length_38386_cov_69.585476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.1126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537685" accession="ERS11139655">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139655</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537685</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738203_virus.196</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738203.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08777445863796453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738203) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560247) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_196_length_105495_cov_10.505777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.972972972972973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_196_length_105495_cov_10.505777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537686" accession="ERS11139656">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139656</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537686</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738203_virus.4002</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738203.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.5489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738203) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560247) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_4002_length_14106_cov_30.864345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_4002_length_14106_cov_30.864345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea;s__Dorea formicigenerans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.4002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537687" accession="ERS11139657">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139657</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537687</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738203_virus.645</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738203.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738203) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560247) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_645_length_53837_cov_62.035342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_12_2277__NODE_645_length_53837_cov_62.035342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537688" accession="ERS11139658">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139658</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537688</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738204_provirus.169</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738204.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738204) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560453) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_169_length_85678_cov_7.832782_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738204_bin.130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_169_length_85678_cov_7.832782_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738260_provirus.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537689" accession="ERS11139659">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139659</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537689</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738204_provirus.62</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738204.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0213092263283177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.1333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738204) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560453) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_62_length_123979_cov_10.712797_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738204_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_62_length_123979_cov_10.712797_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_provirus.230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537690" accession="ERS11139660">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139660</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537690</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738204_virus.131</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738204.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738204) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560453) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_131_length_96442_cov_7.828413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.34615384615384615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_131_length_96442_cov_7.828413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738204_virus.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537691" accession="ERS11139661">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139661</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537691</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738204_virus.338</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738204.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.1591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738204) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560453) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_338_length_62662_cov_40.258528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_338_length_62662_cov_40.258528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738182_virus.387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537692" accession="ERS11139662">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139662</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537692</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738204_virus.59</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738204.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01328965839962564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738204) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560453) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_59_length_125014_cov_6.093967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_6_1848__NODE_59_length_125014_cov_6.093967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537693" accession="ERS11139663">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139663</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537693</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738205_provirus.1655</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738205.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.1773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738205) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_1655_length_24804_cov_17.572330_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_1655_length_24804_cov_17.572330_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_virus.1581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537694" accession="ERS11139664">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139664</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537694</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738205_provirus.597</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738205.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.130873179909068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738205) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_597_length_53170_cov_8.982766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738205_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_597_length_53170_cov_8.982766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.1123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537695" accession="ERS11139665">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139665</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537695</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738205_virus.1333</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738205.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738205) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_1333_length_29297_cov_8.035592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9642857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_1333_length_29297_cov_8.035592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738205_virus.1333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537696" accession="ERS11139666">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139666</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537696</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738205_virus.230</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr273_1 virus assembled from ERR7738205.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738205) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_230_length_96459_cov_15.048764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7913043478260869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_230_length_96459_cov_15.048764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746111_bin.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002251295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; crAssphage cr273_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537697" accession="ERS11139667">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139667</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537697</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738205_virus.472</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738205.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738205) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_472_length_63806_cov_18.677682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_472_length_63806_cov_18.677682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537698" accession="ERS11139668">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139668</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537698</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738205_virus.836</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738205.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738205) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562320) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_836_length_41138_cov_7.564307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7407407407407407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_21_1526__NODE_836_length_41138_cov_7.564307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME176098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.2049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537699" accession="ERS11139669">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139669</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537699</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738206_provirus.204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738206.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738206) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_204_length_93947_cov_8.719335_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738206_bin.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.903846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_204_length_93947_cov_8.719335_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.1234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537700" accession="ERS11139670">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139670</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537700</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738206_provirus.693</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738206.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.9444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738206) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_693_length_47709_cov_49.455492_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_693_length_47709_cov_49.455492_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_provirus.420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537701" accession="ERS11139671">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139671</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537701</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738206_virus.1198</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738206.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10914096916299568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.3297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738206) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_1198_length_33826_cov_5.385789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_1198_length_33826_cov_5.385789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0331948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537702" accession="ERS11139672">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139672</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537702</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738206_virus.167</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738206.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738206) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_167_length_100528_cov_7.511294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738206_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5671641791044776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_167_length_100528_cov_7.511294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_virus.236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537703" accession="ERS11139673">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139673</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537703</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738206_virus.543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738206.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>168.689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738206) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_543_length_55561_cov_110.091342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_20_2353__NODE_543_length_55561_cov_110.091342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.1999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537704" accession="ERS11139674">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139674</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537704</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738207_provirus.217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738207.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.3698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738207) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_217_length_90301_cov_47.823373_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7931034482758621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_217_length_90301_cov_47.823373_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537705" accession="ERS11139675">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139675</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537705</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738207_provirus.67</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738207.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>222.477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738207) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_67_length_156420_cov_129.018024_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_67_length_156420_cov_129.018024_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537706" accession="ERS11139676">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139676</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537706</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738207_virus.1218</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738207.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>398.304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738207) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_1218_length_34575_cov_222.052931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_1218_length_34575_cov_222.052931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME079077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000434935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.2011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537707" accession="ERS11139677">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139677</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537707</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738207_virus.1827</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738207.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.3827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738207) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_1827_length_25117_cov_12.071086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_1827_length_25117_cov_12.071086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537708" accession="ERS11139678">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139678</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537708</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738207_virus.329</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738207.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738207) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_329_length_74051_cov_7.546530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6851851851851852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_329_length_74051_cov_7.546530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Intestinibacter;s__Intestinibacter bartlettii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537709" accession="ERS11139679">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139679</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537709</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738207_virus.899</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738207.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738207) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559675) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_899_length_42016_cov_9.737333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_22_2341__NODE_899_length_42016_cov_9.737333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738207_virus.899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537710" accession="ERS11139680">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139680</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537710</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738208_provirus.1523</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738208.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738208) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561203) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_1523_length_31651_cov_9.379299_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738208_bin.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_1523_length_31651_cov_9.379299_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.1495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537711" accession="ERS11139681">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139681</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537711</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738208_provirus.64</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738208.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1474118942731278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.5793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738208) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561203) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_64_length_157246_cov_18.312835_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738208_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9803921568627452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_64_length_157246_cov_18.312835_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME212098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus lutetiensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738208_provirus.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537712" accession="ERS11139682">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139682</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537712</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738208_virus.1446</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738208.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738208) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561203) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_1446_length_32709_cov_6.418209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_1446_length_32709_cov_6.418209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Senegalimassilia;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.3424238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537713" accession="ERS11139683">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139683</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537713</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738208_virus.1778</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738208.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.9602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738208) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561203) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_1778_length_28463_cov_19.316670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_1778_length_28463_cov_19.316670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.1890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537714" accession="ERS11139684">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139684</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537714</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738208_virus.415</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738208.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738208) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561203) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_415_length_68588_cov_11.172556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_415_length_68588_cov_11.172556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_virus.237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537715" accession="ERS11139685">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139685</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537715</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738208_virus.932</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738208.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738208) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561203) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_932_length_41986_cov_28.238087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_12_1619__NODE_932_length_41986_cov_28.238087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746721_virus.430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537716" accession="ERS11139686">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139686</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537716</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738209_provirus.1377</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae provirus assembled from ERR7738209.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.12072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738209) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560053) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_1377_length_47774_cov_5.300501_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_1377_length_47774_cov_5.300501_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_virus.602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537717" accession="ERS11139687">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139687</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537717</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738209_provirus.623</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738209.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.7712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738209) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560053) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_623_length_86156_cov_37.223492_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738209_bin.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_623_length_86156_cov_37.223492_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_provirus.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537718" accession="ERS11139688">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139688</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537718</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738209_virus.1223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738209.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>124.318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738209) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560053) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_1223_length_53119_cov_70.731477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6041666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_1223_length_53119_cov_70.731477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738209_virus.1223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537719" accession="ERS11139689">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139689</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537719</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738209_virus.2084</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738209.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.9438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738209) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560053) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_2084_length_32066_cov_38.951390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_2084_length_32066_cov_38.951390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738209_virus.2084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537720" accession="ERS11139690">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139690</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537720</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738209_virus.3545</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738209.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>138.883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738209) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560053) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_3545_length_18254_cov_79.818837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_3545_length_18254_cov_79.818837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_virus.2030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537721" accession="ERS11139691">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139691</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537721</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738209_virus.751</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738209.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.0548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738209) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560053) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_751_length_76468_cov_27.710084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8648648648648649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_2_2152__NODE_751_length_76468_cov_27.710084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737972_virus.307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537722" accession="ERS11139692">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139692</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537722</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738210_provirus.283</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738210.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.4093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738210) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560447) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_283_length_73848_cov_38.565900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738210_bin.105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_283_length_73848_cov_38.565900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537723" accession="ERS11139693">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139693</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537723</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738210_provirus.87</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738210.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.6376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738210) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560447) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_87_length_122087_cov_35.740775_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5588235294117647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_87_length_122087_cov_35.740775_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745675_virus.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537724" accession="ERS11139694">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139694</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537724</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738210_virus.1426</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738210.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738210) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560447) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_1426_length_25639_cov_7.132853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_1426_length_25639_cov_7.132853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738210_virus.1426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537725" accession="ERS11139695">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139695</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537725</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738210_virus.514</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738210.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.8559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738210) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560447) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_514_length_52443_cov_15.930298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738210_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_12_1851__NODE_514_length_52443_cov_15.930298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_virus.780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537726" accession="ERS11139696">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139696</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537726</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738211_provirus.1254</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738211.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.8304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738211) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_1254_length_29216_cov_23.738769_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_1254_length_29216_cov_23.738769_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.1468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537727" accession="ERS11139697">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139697</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537727</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738211_provirus.75</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738211.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2060434728913873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>308.889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738211) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_75_length_124500_cov_263.356526_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_75_length_124500_cov_263.356526_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.2104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537728" accession="ERS11139698">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139698</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537728</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738211_virus.25</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738211.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738211) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_25_length_189795_cov_9.923286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_25_length_189795_cov_9.923286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537729" accession="ERS11139699">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139699</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537729</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738211_virus.644</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738211.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.9739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738211) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559804) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_644_length_47045_cov_9.104305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_21_2445__NODE_644_length_47045_cov_9.104305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537730" accession="ERS11139700">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139700</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537730</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_provirus.10</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_10_length_367401_cov_33.805834_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738212_bin.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_10_length_367401_cov_33.805834_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;s__Lachnospira sp900545725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746012_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537731" accession="ERS11139701">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139701</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537731</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_provirus.233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.8989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_233_length_102099_cov_82.089785_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5862068965517241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_233_length_102099_cov_82.089785_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537732" accession="ERS11139702">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139702</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537732</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_provirus.649</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_649_length_61041_cov_7.466603_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>42.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_649_length_61041_cov_7.466603_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537733" accession="ERS11139703">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139703</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537733</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_virus.1221</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_1221_length_41156_cov_10.597775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8484848484848485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_1221_length_41156_cov_10.597775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_virus.1221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537734" accession="ERS11139704">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139704</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537734</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_virus.1630</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_1630_length_33415_cov_9.282710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738212_bin.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_1630_length_33415_cov_9.282710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738233_virus.443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537735" accession="ERS11139705">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139705</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537735</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_virus.2086</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.60892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_2086_length_27968_cov_4.036284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738212_bin.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_2086_length_27968_cov_4.036284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME254156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-582;s__CAG-582 sp000435515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_virus.2086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537736" accession="ERS11139706">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139706</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537736</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_virus.239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25495594713656383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.33836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_239_length_101285_cov_5.336436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_239_length_101285_cov_5.336436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738531_virus.150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537737" accession="ERS11139707">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139707</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537737</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738212_virus.663</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738212.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.1092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738212) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559763) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_663_length_60238_cov_21.196622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9428571428571428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_10_2358__NODE_663_length_60238_cov_21.196622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738148_bin.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp900544615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.1752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537738" accession="ERS11139708">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139708</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537738</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_provirus.1202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1202_length_45576_cov_6.658806_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1202_length_45576_cov_6.658806_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745397_provirus.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537739" accession="ERS11139709">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139709</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537739</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_provirus.278</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.7189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_278_length_107566_cov_45.070603_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738213_bin.135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_278_length_107566_cov_45.070603_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0296258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537740" accession="ERS11139710">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139710</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537740</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_provirus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_71_length_213615_cov_9.913383_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738213_bin.174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_71_length_213615_cov_9.913383_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537741" accession="ERS11139711">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139711</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537741</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_virus.1113</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.46637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1113_length_48041_cov_4.807043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1113_length_48041_cov_4.807043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537742" accession="ERS11139712">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139712</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537742</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_virus.1393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1393_length_41489_cov_9.026007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1393_length_41489_cov_9.026007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537743" accession="ERS11139713">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139713</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537743</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_virus.1729</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1729_length_35331_cov_7.300051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_1729_length_35331_cov_7.300051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME222400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UBA1227;s__UBA1227 sp900545655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738213_virus.1729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537744" accession="ERS11139714">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139714</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537744</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_virus.2248</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phietavirus virus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_2248_length_28739_cov_7.637290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7407407407407407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_2248_length_28739_cov_7.637290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738213_virus.2248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Azeredovirinae; Phietavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537745" accession="ERS11139715">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139715</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537745</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_virus.3363</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.67049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_3363_length_20301_cov_5.631972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_3363_length_20301_cov_5.631972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537746" accession="ERS11139716">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139716</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537746</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738213_virus.982</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738213.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.2167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738213) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561379) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_982_length_51996_cov_35.131994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_13_1506__NODE_982_length_51996_cov_35.131994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.1048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537747" accession="ERS11139717">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139717</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537747</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_provirus.596</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08468667725817124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.6087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_596_length_49475_cov_19.843313_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738214_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_596_length_49475_cov_19.843313_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537748" accession="ERS11139718">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139718</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537748</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.1157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>96.1432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_1157_length_31088_cov_61.196962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_1157_length_31088_cov_61.196962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME099131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000435075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.2151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537749" accession="ERS11139719">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139719</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537749</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2830.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_157_length_106893_cov_1698.004419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.954954954954955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_157_length_106893_cov_1698.004419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_virus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537750" accession="ERS11139720">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139720</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537750</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.216</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1658.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_216_length_91873_cov_1005.649375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4888888888888889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_216_length_91873_cov_1005.649375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537751" accession="ERS11139721">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139721</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537751</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.2606</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_2606_length_17170_cov_12.708645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_2606_length_17170_cov_12.708645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.2606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537752" accession="ERS11139722">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139722</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537752</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.3431</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03986784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_3431_length_13693_cov_6.115977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_3431_length_13693_cov_6.115977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.3431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537753" accession="ERS11139723">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139723</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537753</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.477</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537753</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_477_length_57213_cov_6.673393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_477_length_57213_cov_6.673393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-724;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537754" accession="ERS11139724">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139724</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537754</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.653</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_653_length_47213_cov_8.213531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_653_length_47213_cov_8.213531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746741_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;s__RUG572 sp900547945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.1666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537755" accession="ERS11139725">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139725</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537755</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Braunvirinae virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_730_length_43799_cov_12.139312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6612903225806451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_730_length_43799_cov_12.139312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME108563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UBA3789;s__UBA3789 sp900543055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Drexlerviridae; Braunvirinae; unclassified Braunvirinae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537756" accession="ERS11139726">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139726</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537756</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.809</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>264.724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_809_length_41186_cov_160.454426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_809_length_41186_cov_160.454426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.1502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537757" accession="ERS11139727">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139727</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537757</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738214_virus.929</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738214.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3799559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738214) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560007) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_929_length_36798_cov_6.739876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_11_1999__NODE_929_length_36798_cov_6.739876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.1480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537758" accession="ERS11139728">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139728</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537758</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_provirus.191</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.4291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_191_length_113756_cov_12.688227_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738215_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_191_length_113756_cov_12.688227_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738227_bin.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp004557855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738215_provirus.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537759" accession="ERS11139729">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139729</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537759</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_provirus.653</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.47383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_653_length_58333_cov_4.670437_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738215_bin.187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_653_length_58333_cov_4.670437_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_provirus.4_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537760" accession="ERS11139730">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139730</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537760</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_virus.1150</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bdellovibrio phage phi1422 virus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.9552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_1150_length_40983_cov_36.766049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_1150_length_40983_cov_36.766049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Bdellovibrio phage phi1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537761" accession="ERS11139731">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139731</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537761</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_virus.1479</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7573237885462554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.4647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_1479_length_34997_cov_11.690092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_1479_length_34997_cov_11.690092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.1676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537762" accession="ERS11139732">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139732</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537762</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_virus.2080</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_2080_length_27438_cov_8.758598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_2080_length_27438_cov_8.758598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738215_virus.2080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537763" accession="ERS11139733">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139733</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537763</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_virus.2737</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.0491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_2737_length_22454_cov_22.705054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_2737_length_22454_cov_22.705054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_provirus.2080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537764" accession="ERS11139734">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139734</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537764</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_virus.550</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.17249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_550_length_64358_cov_4.654361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48148148148148145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_550_length_64358_cov_4.654361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738215_virus.550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537765" accession="ERS11139735">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139735</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537765</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_virus.697</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>90.4241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_697_length_56266_cov_50.974443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_697_length_56266_cov_50.974443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738215_virus.697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537766" accession="ERS11139736">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139736</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537766</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738215_virus.924</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738215.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.0876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738215) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561326) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_924_length_47288_cov_10.214187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_23_1309__NODE_924_length_47288_cov_10.214187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_provirus.148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537767" accession="ERS11139737">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139737</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537767</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_provirus.112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09849942002795124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_112_length_167225_cov_8.193828_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738216_bin.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_112_length_167225_cov_8.193828_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738216_bin.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1242;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738216_provirus.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537768" accession="ERS11139738">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139738</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537768</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_provirus.268</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0322136563876652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>78.9345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_268_length_121068_cov_49.286468_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738216_bin.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_268_length_121068_cov_49.286468_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738216_provirus.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537769" accession="ERS11139739">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139739</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537769</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_provirus.684</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03986784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.3683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_684_length_70316_cov_24.025627_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738216_bin.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_684_length_70316_cov_24.025627_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME258969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__CAG-269 sp001916005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738216_provirus.684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537770" accession="ERS11139740">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139740</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537770</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_virus.1083</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00505093710516759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>101.543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_1083_length_53347_cov_60.109499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_1083_length_53347_cov_60.109499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537771" accession="ERS11139741">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139741</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537771</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_virus.169</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.2982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_169_length_146589_cov_23.530489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6981132075471698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_169_length_146589_cov_23.530489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.887873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537772" accession="ERS11139742">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139742</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537772</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_virus.2187</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3673458149779736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_2187_length_31039_cov_9.066178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_2187_length_31039_cov_9.066178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747426_bin.316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D succinifaciens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537773" accession="ERS11139743">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139743</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537773</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_virus.4002</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06798872201629169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.8811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_4002_length_18414_cov_56.033484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_4002_length_18414_cov_56.033484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738216_virus.4002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537774" accession="ERS11139744">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139744</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537774</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738216_virus.91</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738216.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1424559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738216) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559673) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_91_length_181664_cov_21.462566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738216_bin.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5681818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_16_2338__NODE_91_length_181664_cov_21.462566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738216_virus.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537775" accession="ERS11139745">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139745</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537775</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738217_provirus.486</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_486_length_61862_cov_9.732055_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_486_length_61862_cov_9.732055_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738217_provirus.486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537776" accession="ERS11139746">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139746</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537776</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738217_virus.1158</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_1158_length_35319_cov_8.200159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_1158_length_35319_cov_8.200159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738217_virus.1158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537777" accession="ERS11139747">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139747</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537777</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738217_virus.1451</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_1451_length_30061_cov_6.436233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_1451_length_30061_cov_6.436233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_provirus.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537778" accession="ERS11139748">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139748</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537778</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738217_virus.2552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_2552_length_19880_cov_7.096147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_2552_length_19880_cov_7.096147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.2492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537779" accession="ERS11139749">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139749</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537779</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738217_virus.749</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17129856875493005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.15339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_749_length_46614_cov_4.984700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_H_A_11_2177__NODE_749_length_46614_cov_4.984700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-884;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537780" accession="ERS11139750">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139750</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537780</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738218_provirus.151</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738218.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12085444902106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738218) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561078) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_151_length_98797_cov_10.954021_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738218_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_151_length_98797_cov_10.954021_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738595_bin.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_provirus.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537781" accession="ERS11139751">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139751</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537781</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738218_provirus.93</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7738218.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24245594713656385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738218) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561078) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_93_length_123700_cov_7.363411_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9295774647887324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_93_length_123700_cov_7.363411_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_provirus.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537782" accession="ERS11139752">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139752</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537782</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738218_virus.1283</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738218.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738218) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561078) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_1283_length_28067_cov_6.239728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738218_bin.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_1283_length_28067_cov_6.239728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_virus.1283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537783" accession="ERS11139753">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139753</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537783</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738218_virus.2619</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738218.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738218) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561078) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_2619_length_15756_cov_6.463715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_2619_length_15756_cov_6.463715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_virus.2619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537784" accession="ERS11139754">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139754</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537784</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738218_virus.432</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738218.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>126.323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738218) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561078) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_432_length_58693_cov_68.560035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9583333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_432_length_58693_cov_68.560035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537785" accession="ERS11139755">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139755</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537785</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738218_virus.714</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738218.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738218) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561078) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_714_length_42926_cov_10.770310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_714_length_42926_cov_10.770310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_virus.714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537786" accession="ERS11139756">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139756</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537786</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738218_virus.935</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738218.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738218) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561078) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_935_length_35715_cov_6.984399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_1_1575__NODE_935_length_35715_cov_6.984399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.2577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537787" accession="ERS11139757">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139757</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537787</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738219_provirus.208</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.1503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561625) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_208_length_85237_cov_30.147839_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738219_bin.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_208_length_85237_cov_30.147839_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738251_provirus.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537788" accession="ERS11139758">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139758</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537788</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738219_provirus.572</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.2942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561625) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_572_length_47091_cov_43.551304_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738219_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_572_length_47091_cov_43.551304_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738228_provirus.378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537789" accession="ERS11139759">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139759</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537789</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738219_virus.1826</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2693548984968848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561625) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_1826_length_22838_cov_7.139976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738219_bin.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_1826_length_22838_cov_7.139976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_virus.1846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537790" accession="ERS11139760">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139760</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537790</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738219_virus.4087</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>172.418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561625) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_4087_length_13317_cov_104.536254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_4087_length_13317_cov_104.536254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.4087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537791" accession="ERS11139761">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139761</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537791</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738219_virus.685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561625) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_685_length_42779_cov_12.465365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_685_length_42779_cov_12.465365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0316936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537792" accession="ERS11139762">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139762</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537792</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738219_virus.886</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.5986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561625) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_886_length_36733_cov_17.011703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_14_1184__NODE_886_length_36733_cov_17.011703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.1558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537793" accession="ERS11139763">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139763</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537793</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738221_provirus.318</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738221.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.2642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738221) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_318_length_67879_cov_38.061606_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738221_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_318_length_67879_cov_38.061606_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_provirus.106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537794" accession="ERS11139764">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139764</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537794</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738221_provirus.65</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage provirus assembled from ERR7738221.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738221) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_65_length_136577_cov_8.420374_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738221_bin.126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_65_length_136577_cov_8.420374_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002251385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738141_virus.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537795" accession="ERS11139765">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139765</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537795</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738221_virus.133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738221.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0680097705694487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.5287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738221) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_133_length_100444_cov_30.890382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_133_length_100444_cov_30.890382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537796" accession="ERS11139766">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139766</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537796</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738221_virus.2231</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738221.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.22957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738221) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_2231_length_18027_cov_3.850919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_2231_length_18027_cov_3.850919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738221_virus.2231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537797" accession="ERS11139767">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139767</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537797</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738221_virus.528</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738221.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.2385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738221) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_528_length_51747_cov_19.049758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_528_length_51747_cov_19.049758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_provirus.890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537798" accession="ERS11139768">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139768</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537798</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738221_virus.753</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738221.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738221) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_753_length_41629_cov_7.733707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42105263157894735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_753_length_41629_cov_7.733707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_virus.1341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537799" accession="ERS11139769">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139769</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537799</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738221_virus.914</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738221.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1074339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738221) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_914_length_36773_cov_20.821479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738221_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_19_1307__NODE_914_length_36773_cov_20.821479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738221_virus.914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537800" accession="ERS11139770">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139770</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537800</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738222_provirus.456</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738222.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.5975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738222) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561039) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_456_length_67045_cov_18.966596_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738222_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_456_length_67045_cov_18.966596_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_provirus.617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537801" accession="ERS11139771">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139771</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537801</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738222_virus.1175</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738222.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738222) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561039) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_1175_length_35270_cov_7.492626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_1175_length_35270_cov_7.492626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537802" accession="ERS11139772">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139772</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537802</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738222_virus.1862</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738222.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25781851150140056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738222) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561039) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_1862_length_25926_cov_5.688731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_1862_length_25926_cov_5.688731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746344_virus.405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537803" accession="ERS11139773">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139773</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537803</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738222_virus.2550</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738222.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.88742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738222) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561039) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_2550_length_21104_cov_4.391972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_2550_length_21104_cov_4.391972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_provirus.1362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537804" accession="ERS11139774">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139774</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537804</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738222_virus.571</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7738222.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738222) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561039) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_571_length_58446_cov_5.953914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738222_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8787878787878788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_571_length_58446_cov_5.953914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_C</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_provirus.687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537805" accession="ERS11139775">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139775</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537805</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738222_virus.8021</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738222.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03997797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.6939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738222) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561039) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_8021_length_9062_cov_4.145019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_2_1565__NODE_8021_length_9062_cov_4.145019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_virus.6778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537806" accession="ERS11139776">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139776</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537806</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738223_provirus.14466</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738223.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13558963045632055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.76893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738223) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560427) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_14466_length_6503_cov_9.445534_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_14466_length_6503_cov_9.445534_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME276106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900548615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738223_provirus.14466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537807" accession="ERS11139777">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139777</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537807</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738223_provirus.535</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738223.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738223) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560427) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_535_length_66803_cov_7.716048_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_535_length_66803_cov_7.716048_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-605;s__CAG-605 sp000433475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738223_provirus.535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537808" accession="ERS11139778">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139778</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537808</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738223_virus.1051</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738223.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.5145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738223) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560427) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_1051_length_45417_cov_5.455712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_1051_length_45417_cov_5.455712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738223_virus.1051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537809" accession="ERS11139779">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139779</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537809</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738223_virus.1583</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738223.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.9266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738223) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560427) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_1583_length_35123_cov_5.025253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_1583_length_35123_cov_5.025253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.2319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537810" accession="ERS11139780">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139780</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537810</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738223_virus.234</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738223.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>105.243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738223) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560427) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_234_length_98991_cov_63.832339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6170212765957447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_234_length_98991_cov_63.832339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738223_virus.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537811" accession="ERS11139781">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139781</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537811</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738223_virus.3085</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738223.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738223) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560427) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_3085_length_21969_cov_7.598438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_3085_length_21969_cov_7.598438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.4695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537812" accession="ERS11139782">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139782</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537812</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738223_virus.720</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738223.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.4128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738223) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560427) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_720_length_56917_cov_25.295461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_8_1817__NODE_720_length_56917_cov_25.295461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537813" accession="ERS11139783">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139783</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537813</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_provirus.2</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01734581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>200.345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_2_length_823501_cov_114.465922_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738224_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_2_length_823501_cov_114.465922_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_provirus.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537814" accession="ERS11139784">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139784</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537814</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_provirus.692</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis provirus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_692_length_58602_cov_7.484699_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.532258064516129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_692_length_58602_cov_7.484699_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_provirus.692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537815" accession="ERS11139785">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139785</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537815</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_virus.1092</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_1092_length_43188_cov_9.633133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.696969696969697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_1092_length_43188_cov_9.633133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_virus.1092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537816" accession="ERS11139786">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139786</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537816</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_virus.1415</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.3226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_1415_length_35280_cov_15.791836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_1415_length_35280_cov_15.791836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.1498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537817" accession="ERS11139787">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139787</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537817</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_virus.1980</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.36641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_1980_length_27447_cov_4.532737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_1980_length_27447_cov_4.532737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537818" accession="ERS11139788">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139788</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537818</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_virus.2593</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17129856875493005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.19774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_2593_length_21779_cov_4.498341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_2593_length_21779_cov_4.498341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.2249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537819" accession="ERS11139789">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139789</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537819</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_virus.511</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_511_length_72925_cov_20.684315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8372093023255814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_511_length_72925_cov_20.684315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-465;g__CAG-465;s__CAG-465 sp000433755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_virus.511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537820" accession="ERS11139790">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139790</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537820</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738224_virus.812</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Klebsiella phage P-K7R virus assembled from ERR7738224.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.0878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738224) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560526) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_812_length_52543_cov_19.677944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7804878048780488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_20_1921__NODE_812_length_52543_cov_19.677944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_virus.812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Klebsiella phage P-K7R</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537821" accession="ERS11139791">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139791</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537821</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738225_provirus.321</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738225.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738225) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559286) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_321_length_65221_cov_61.655425_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_321_length_65221_cov_61.655425_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738225_provirus.321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537822" accession="ERS11139792">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139792</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537822</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738225_virus.120</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738225.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.3035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738225) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559286) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_120_length_104417_cov_13.885327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_120_length_104417_cov_13.885327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738225_virus.120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537823" accession="ERS11139793">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139793</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537823</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738225_virus.22</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738225.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04793421803331155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>358.923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738225) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559286) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_22_length_181530_cov_202.432195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5192307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_22_length_181530_cov_202.432195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537824" accession="ERS11139794">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139794</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537824</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738225_virus.637</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738225.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738225) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559286) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_637_length_44760_cov_18.146051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3636363636363637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_637_length_44760_cov_18.146051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738225_virus.637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537825" accession="ERS11139795">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139795</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537825</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738225_virus.919</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738225.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.7493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738225) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559286) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_919_length_35410_cov_17.233917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_C_20_2577__NODE_919_length_35410_cov_17.233917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME097668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738225_virus.919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537826" accession="ERS11139796">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139796</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537826</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738226_provirus.77</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738226.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.777477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738226) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561658) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_15_1254__NODE_77_length_138077_cov_12.614543_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738226_bin.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_15_1254__NODE_77_length_138077_cov_12.614543_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides thetaiotaomicron</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738175_provirus.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537827" accession="ERS11139797">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139797</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537827</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_provirus.292</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00743392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_292_length_76677_cov_9.602467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738227_bin.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_292_length_76677_cov_9.602467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0319855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537828" accession="ERS11139798">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139798</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537828</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_provirus.67</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>263.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_67_length_161564_cov_161.874714_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_67_length_161564_cov_161.874714_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745625_provirus.431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537829" accession="ERS11139799">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139799</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537829</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_virus.1325</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.69315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_1325_length_30715_cov_4.979241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_1325_length_30715_cov_4.979241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME248826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_D</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738534_provirus.448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537830" accession="ERS11139800">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139800</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537830</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_virus.1823</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00863157861825958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_1823_length_24717_cov_8.113068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_1823_length_24717_cov_8.113068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537831" accession="ERS11139801">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139801</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537831</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_virus.2932</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_2932_length_17383_cov_22.479140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_2932_length_17383_cov_22.479140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738534_virus.4069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537832" accession="ERS11139802">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139802</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537832</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_virus.501</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.9281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_501_length_57344_cov_56.630328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_501_length_57344_cov_56.630328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_virus.1262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537833" accession="ERS11139803">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139803</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537833</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_virus.709</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.3845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_709_length_46051_cov_34.659177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_709_length_46051_cov_34.659177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738167_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFHK01;s__SFHK01 sp004556395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745625_virus.630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537834" accession="ERS11139804">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139804</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537834</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738227_virus.99</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738227.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738227) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560982) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_99_length_130676_cov_5.909563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_3_1562__NODE_99_length_130676_cov_5.909563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME256853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-302;s__CAG-302 sp001916775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_virus.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537835" accession="ERS11139805">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139805</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537835</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738228_provirus.378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738228.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.7128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738228) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561052) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_378_length_49176_cov_14.278336_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8709677419354839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_378_length_49176_cov_14.278336_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738228_provirus.378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537836" accession="ERS11139806">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139806</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537836</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738228_virus.1433</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738228.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738228) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561052) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_1433_length_23066_cov_6.366219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_1433_length_23066_cov_6.366219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.2064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537837" accession="ERS11139807">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139807</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537837</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738228_virus.1865</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738228.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18564382477710456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738228) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561052) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_1865_length_19292_cov_7.516289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_1865_length_19292_cov_7.516289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737619_virus.813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537838" accession="ERS11139808">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139808</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537838</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738228_virus.343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738228.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738228) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561052) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_343_length_51612_cov_10.759581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5526315789473685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_343_length_51612_cov_10.759581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900768995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738228_virus.343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537839" accession="ERS11139809">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139809</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537839</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738228_virus.586</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738228.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.4505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738228) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561052) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_586_length_38862_cov_21.733222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738228_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_586_length_38862_cov_21.733222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738228_virus.586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537840" accession="ERS11139810">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139810</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537840</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738228_virus.746</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738228.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.3045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738228) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561052) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_746_length_34276_cov_26.015264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_23_1564__NODE_746_length_34276_cov_26.015264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537841" accession="ERS11139811">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139811</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537841</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738229_provirus.17</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738229.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.9507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738229) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567094) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_17_length_218050_cov_14.654544_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738229_bin.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6904761904761905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_17_length_218050_cov_14.654544_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738229_provirus.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537842" accession="ERS11139812">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139812</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537842</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738229_virus.1182</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738229.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.9729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738229) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567094) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_1182_length_28264_cov_43.322950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_1182_length_28264_cov_43.322950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537843" accession="ERS11139813">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139813</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537843</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738229_virus.253</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738229.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>147.192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738229) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567094) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_253_length_76201_cov_85.863210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.26666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_253_length_76201_cov_85.863210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738229_virus.253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537844" accession="ERS11139814">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139814</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537844</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738229_virus.6304</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7738229.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738229) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567094) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_6304_length_6393_cov_8.823939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_6304_length_6393_cov_8.823939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738229_virus.6304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537845" accession="ERS11139815">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139815</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537845</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738229_virus.978</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738229.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>103.342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738229) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567094) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_978_length_32862_cov_62.836328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_11_CHE0009BZ__NODE_978_length_32862_cov_62.836328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_provirus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537846" accession="ERS11139816">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139816</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537846</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_provirus.351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14697136563876662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.6614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_351_length_91022_cov_33.720084_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_351_length_91022_cov_33.720084_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_provirus.872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537847" accession="ERS11139817">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139817</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537847</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_provirus.96</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.3616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_96_length_173901_cov_27.375282_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738230_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_96_length_173901_cov_27.375282_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738230_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Fibrobacterota;c__Fibrobacteria;o__Fibrobacterales;f__Fibrobacteraceae;g__Fibrobacter_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_provirus.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537848" accession="ERS11139818">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139818</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537848</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.1190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.7333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_1190_length_40122_cov_38.121463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_1190_length_40122_cov_38.121463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.1823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537849" accession="ERS11139819">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139819</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537849</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.1393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_1393_length_35871_cov_13.541404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_1393_length_35871_cov_13.541404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738274_bin.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Lentisphaeria;o__UBA1407;f__UBA1407;g__UBA1724;s__UBA1724 sp900548225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_virus.1393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537850" accession="ERS11139820">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139820</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537850</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.172</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_172_length_133202_cov_16.675854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_172_length_133202_cov_16.675854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738580_bin.236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UMGS1668;s__UMGS1668 sp900553955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537851" accession="ERS11139821">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139821</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537851</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.2168</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1424559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_2168_length_25682_cov_10.730795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_2168_length_25682_cov_10.730795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.2254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537852" accession="ERS11139822">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139822</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537852</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.27</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.1908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_27_length_276188_cov_28.168860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.746268656716418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_27_length_276188_cov_28.168860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745397_virus.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537854" accession="ERS11139824">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139824</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537854</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.692</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.8318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_692_length_58489_cov_30.540300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4583333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_692_length_58489_cov_30.540300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1696;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_virus.692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537855" accession="ERS11139825">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139825</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537855</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.858</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.32204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_858_length_50607_cov_4.551079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.34615384615384615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_858_length_50607_cov_4.551079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.1351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537856" accession="ERS11139826">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139826</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537856</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738231_provirus.119</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738231.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18997797356828192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>832.154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738231) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559570) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_119_length_154447_cov_490.801173_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738231_bin.10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_119_length_154447_cov_490.801173_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537857" accession="ERS11139827">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139827</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537857</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738231_provirus.685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738231.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1248898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.4798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738231) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559570) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_685_length_64838_cov_33.104337_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738231_bin.148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_685_length_64838_cov_33.104337_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_provirus.685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537858" accession="ERS11139828">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139828</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537858</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738231_virus.1219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738231.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.1721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738231) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559570) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_1219_length_45258_cov_11.574866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_1219_length_45258_cov_11.574866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.1948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537859" accession="ERS11139829">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139829</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537859</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738231_virus.17462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Chlamydiamicrovirus virus assembled from ERR7738231.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738231) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559570) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_17462_length_5052_cov_8.104322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_17462_length_5052_cov_8.104322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738231_virus.17462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; Chlamydiamicrovirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537860" accession="ERS11139830">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139830</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537860</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738231_virus.316</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738231.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738231) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559570) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_316_length_96312_cov_63.198098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9583333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_316_length_96312_cov_63.198098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537861" accession="ERS11139831">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139831</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537861</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738231_virus.7124</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738231.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06078698503890495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.1405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738231) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559570) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_7124_length_11217_cov_29.490215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_23_2677__NODE_7124_length_11217_cov_29.490215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME181333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900540885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738231_virus.7124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537862" accession="ERS11139832">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139832</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537862</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738232_provirus.124</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae provirus assembled from ERR7738232.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.6259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738232) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560045) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_124_length_152824_cov_18.796605_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738232_bin.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_124_length_152824_cov_18.796605_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_provirus.435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537863" accession="ERS11139833">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139833</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537863</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738232_provirus.246</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738232.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.1377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738232) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560045) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_246_length_109757_cov_21.597930_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738232_bin.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_246_length_109757_cov_21.597930_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_provirus.246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537864" accession="ERS11139834">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139834</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537864</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738232_provirus.474</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738232.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.6397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738232) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560045) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_474_length_76218_cov_34.741506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738232_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>41.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_474_length_76218_cov_34.741506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_provirus.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537865" accession="ERS11139835">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139835</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537865</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738232_provirus.897</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738232.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738232) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560045) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_897_length_51050_cov_10.196280_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5675675675675675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_897_length_51050_cov_10.196280_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_provirus.897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537866" accession="ERS11139836">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139836</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537866</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738232_virus.1460</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738232.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738232) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560045) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_1460_length_36358_cov_12.164494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7317073170731707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_1460_length_36358_cov_12.164494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738260_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactiplantibacillus;s__Lactiplantibacillus plantarum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_provirus.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537867" accession="ERS11139837">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139837</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537867</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738232_virus.2853</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lactobacillus phage Sha1 virus assembled from ERR7738232.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07489303048134302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.8081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738232) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560045) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_2853_length_20537_cov_20.131623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_2853_length_20537_cov_20.131623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738260_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactiplantibacillus;s__Lactiplantibacillus plantarum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_provirus.169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lactobacillus phage Sha1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537868" accession="ERS11139838">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139838</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537868</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738232_virus.436</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738232.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738232) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560045) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_436_length_79977_cov_35.824856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6585365853658537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_23_2151__NODE_436_length_79977_cov_35.824856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_virus.436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537869" accession="ERS11139839">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139839</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537869</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738233_provirus.1037</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseobacter phage CRP-7 provirus assembled from ERR7738233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6873898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.0336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559747) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_1037_length_41050_cov_54.623752_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_1037_length_41050_cov_54.623752_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Roseobacter phage CRP-7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537870" accession="ERS11139840">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139840</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537870</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738233_provirus.34</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.5358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559747) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_34_length_242012_cov_13.079542_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_34_length_242012_cov_13.079542_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_provirus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537871" accession="ERS11139841">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139841</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537871</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738233_provirus.731</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.22703565487607813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.6796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559747) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_731_length_51165_cov_27.420725_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_731_length_51165_cov_27.420725_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__CAG-196 sp900553895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.3054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537872" accession="ERS11139842">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139842</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537872</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738233_virus.1289</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10914096916299568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559747) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_1289_length_35470_cov_10.096658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_1289_length_35470_cov_10.096658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0331948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537873" accession="ERS11139843">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139843</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537873</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738233_virus.211</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.4945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559747) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_211_length_104148_cov_17.893630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9078947368421052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_211_length_104148_cov_17.893630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537875" accession="ERS11139845">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139845</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537875</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738233_virus.734</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7738233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18068787764054067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559747) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_734_length_51040_cov_7.219316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_734_length_51040_cov_7.219316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537876" accession="ERS11139846">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139846</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537876</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_provirus.177</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_177_length_131653_cov_14.515649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738234_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9772727272727272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_177_length_131653_cov_14.515649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_provirus.177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537877" accession="ERS11139847">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139847</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537877</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_provirus.719</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_719_length_56732_cov_58.543712_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_719_length_56732_cov_58.543712_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_virus.1930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537878" accession="ERS11139848">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139848</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537878</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_virus.1193</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.9687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_1193_length_40024_cov_4.431747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738234_bin.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_1193_length_40024_cov_4.431747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537879" accession="ERS11139849">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139849</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537879</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_virus.1659</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07692912905635668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.5383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_1659_length_32015_cov_23.217014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738234_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_1659_length_32015_cov_23.217014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.2107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537880" accession="ERS11139850">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139850</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537880</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_virus.2306</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_2306_length_24642_cov_8.181111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_2306_length_24642_cov_8.181111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745434_virus.863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537881" accession="ERS11139851">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139851</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537881</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_virus.3399</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_3399_length_17848_cov_6.129762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_3399_length_17848_cov_6.129762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.6060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537883" accession="ERS11139853">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139853</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537883</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_virus.849</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_849_length_50300_cov_11.893993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_849_length_50300_cov_11.893993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_virus.849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537884" accession="ERS11139854">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139854</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537884</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_provirus.1287</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_1287_length_39994_cov_6.115916_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_1287_length_39994_cov_6.115916_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.2451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537885" accession="ERS11139855">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139855</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537885</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_provirus.322</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.5558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_322_length_90806_cov_10.591531_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_322_length_90806_cov_10.591531_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_provirus.322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537886" accession="ERS11139856">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139856</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537886</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_provirus.81</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_81_length_165907_cov_6.524706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738235_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6984126984126984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_81_length_165907_cov_6.524706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME130774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900545585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_provirus.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537887" accession="ERS11139857">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139857</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537887</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_virus.1135</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.95043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_1135_length_43196_cov_5.117535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6774193548387096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_1135_length_43196_cov_5.117535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738210_provirus.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537889" accession="ERS11139859">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139859</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537889</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_virus.2099</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.3618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_2099_length_27804_cov_16.870740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_2099_length_27804_cov_16.870740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537890" accession="ERS11139860">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139860</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537890</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_virus.518</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_518_length_67784_cov_7.376963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_518_length_67784_cov_7.376963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738238_provirus.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537891" accession="ERS11139861">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139861</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537891</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_virus.718</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>77.3914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_718_length_56525_cov_45.702151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_718_length_56525_cov_45.702151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738206_virus.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537892" accession="ERS11139862">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139862</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537892</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738236_provirus.19</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738236.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738236) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561643) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_18_1212__NODE_19_length_180082_cov_35.602483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_18_1212__NODE_19_length_180082_cov_35.602483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738236_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537893" accession="ERS11139863">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139863</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537893</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738236_virus.231</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738236.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>453.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738236) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561643) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_18_1212__NODE_231_length_40837_cov_257.005079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_18_1212__NODE_231_length_40837_cov_257.005079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738210_provirus.701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537894" accession="ERS11139864">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139864</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537894</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738237_provirus.20457</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Inovirus sp. provirus assembled from ERR7738237.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11996874984972046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.2613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738237) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560661) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_20457_length_4035_cov_27.215766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_20457_length_4035_cov_27.215766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900545495;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738237_provirus.20457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae; Inovirus; unclassified Inovirus; Inovirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537895" accession="ERS11139865">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139865</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537895</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738237_provirus.633</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738237.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.28713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738237) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560661) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_633_length_63739_cov_4.483004_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_633_length_63739_cov_4.483004_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738237_provirus.633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537896" accession="ERS11139866">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139866</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537896</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738237_virus.1036</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738237.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.25239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738237) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560661) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_1036_length_43754_cov_5.176981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43478260869565216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_1036_length_43754_cov_5.176981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738237_virus.1036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537898" accession="ERS11139868">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139868</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537898</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738237_virus.2730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cecivirus virus assembled from ERR7738237.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.6872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738237) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560661) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_2730_length_19651_cov_34.311485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_2730_length_19651_cov_34.311485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Limosilactobacillus;s__Limosilactobacillus fermentum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738237_virus.2730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cecivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537899" accession="ERS11139869">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139869</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537899</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738237_virus.3776</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738237.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.7239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738237) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560661) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_3776_length_15200_cov_24.000860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_3776_length_15200_cov_24.000860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745613_virus.4024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537900" accession="ERS11139870">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139870</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537900</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738237_virus.729</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738237.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17747797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.48821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738237) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560661) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_729_length_57588_cov_5.379423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.35294117647058826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_729_length_57588_cov_5.379423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745433_bin.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738237_virus.729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537901" accession="ERS11139871">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139871</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537901</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738238_provirus.170</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738238.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>267.777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738238) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560650) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_3_1720__NODE_170_length_80924_cov_132.589979_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.391304347826087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_3_1720__NODE_170_length_80924_cov_132.589979_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738238_provirus.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537902" accession="ERS11139872">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139872</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537902</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738238_virus.1210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738238.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.170090861558364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.92359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738238) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560650) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_3_1720__NODE_1210_length_20646_cov_4.890126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_3_1720__NODE_1210_length_20646_cov_4.890126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Flavonifractor;s__Flavonifractor plautii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738238_virus.1210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537904" accession="ERS11139874">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139874</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537904</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_provirus.16</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_16_length_356441_cov_20.999259_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738239_bin.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8958333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_16_length_356441_cov_20.999259_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_virus.1011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537905" accession="ERS11139875">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139875</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537905</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_provirus.53</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.4467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_53_length_190370_cov_34.189503_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738239_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7878787878787878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_53_length_190370_cov_34.189503_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-465;g__CAG-465;s__CAG-465 sp000433755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_provirus.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537906" accession="ERS11139876">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139876</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537906</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_virus.1126</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>256.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_1126_length_40298_cov_155.593620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_1126_length_40298_cov_155.593620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.1126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537907" accession="ERS11139877">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139877</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537907</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_virus.1494</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus virus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.6279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_1494_length_34132_cov_25.381912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_1494_length_34132_cov_25.381912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.1494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537908" accession="ERS11139878">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139878</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537908</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_virus.195</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_195_length_104037_cov_5.940583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9078947368421052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_195_length_104037_cov_5.940583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537909" accession="ERS11139879">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139879</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537909</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_virus.2809</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_2809_length_23069_cov_8.105515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_2809_length_23069_cov_8.105515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0261276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537910" accession="ERS11139880">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139880</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537910</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_virus.540</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_540_length_59280_cov_6.605392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_540_length_59280_cov_6.605392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537912" accession="ERS11139882">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139882</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537912</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_provirus.1999</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6529216648425764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.95769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1999_length_30500_cov_5.293462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738240_bin.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1999_length_30500_cov_5.293462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747083_virus.1678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537913" accession="ERS11139883">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139883</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537913</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_provirus.412</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_412_length_80538_cov_8.493730_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738240_bin.101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_412_length_80538_cov_8.493730_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0297475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537914" accession="ERS11139884">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139884</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537914</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_virus.1056</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.63632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1056_length_45926_cov_5.713712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1056_length_45926_cov_5.713712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745743_provirus.1046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537915" accession="ERS11139885">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139885</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537915</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_virus.1370</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.8645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1370_length_39233_cov_26.873685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1370_length_39233_cov_26.873685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738240_virus.1370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537916" accession="ERS11139886">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139886</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537916</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_virus.1690</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Teubervirus virus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0922356828193833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>363.928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1690_length_34020_cov_218.473853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_1690_length_34020_cov_218.473853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Teubervirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537919" accession="ERS11139888">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139888</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537919</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_virus.330</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_330_length_91500_cov_7.361988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5957446808510638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_330_length_91500_cov_7.361988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537920" accession="ERS11139889">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139889</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537920</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_virus.5703</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18368843659633977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.0459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_5703_length_13665_cov_19.939965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_5703_length_13665_cov_19.939965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.7471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537921" accession="ERS11139890">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139890</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537921</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_virus.786</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.3828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_786_length_55899_cov_12.312726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_786_length_55899_cov_12.312726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738240_virus.786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537922" accession="ERS11139891">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139891</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537922</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_provirus.1866</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_1866_length_27837_cov_6.974099_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738241_bin.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_1866_length_27837_cov_6.974099_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738250_provirus.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537923" accession="ERS11139892">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139892</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537923</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_provirus.507</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5298458149779736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_507_length_72502_cov_7.322651_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738241_bin.205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_507_length_72502_cov_7.322651_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738241_provirus.507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537924" accession="ERS11139893">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139893</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537924</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_virus.1014</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.1956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_1014_length_45685_cov_26.718887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_1014_length_45685_cov_26.718887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537925" accession="ERS11139894">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139894</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537925</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_virus.1197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_1197_length_39700_cov_8.526184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7037037037037037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_1197_length_39700_cov_8.526184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.1677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537926" accession="ERS11139895">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139895</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537926</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_virus.205</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_205_length_122221_cov_7.175825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_205_length_122221_cov_7.175825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738605_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537928" accession="ERS11139897">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139897</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537928</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_virus.397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_397_length_82689_cov_8.787973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_397_length_82689_cov_8.787973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_provirus.322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537929" accession="ERS11139898">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139898</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537929</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_virus.730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.1597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_730_length_57624_cov_43.754670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_730_length_57624_cov_43.754670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738241_virus.730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537930" accession="ERS11139899">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139899</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537930</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738242_provirus.273</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738242.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.8076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738242) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561200) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_273_length_72940_cov_17.138754_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738242_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_273_length_72940_cov_17.138754_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738221_virus.807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537931" accession="ERS11139900">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139900</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537931</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738242_provirus.85</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738242.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738242) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561200) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_85_length_125326_cov_45.458910_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_85_length_125326_cov_45.458910_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_provirus.1065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537932" accession="ERS11139901">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139901</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537932</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738242_virus.1297</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738242.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.26582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738242) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561200) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_1297_length_31322_cov_4.274316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_1297_length_31322_cov_4.274316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537934" accession="ERS11139903">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139903</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537934</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738242_virus.2921</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738242.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01358752575482346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738242) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561200) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_2921_length_18971_cov_10.421827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_2921_length_18971_cov_10.421827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537935" accession="ERS11139904">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139904</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537935</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738242_virus.796</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738242.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738242) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561200) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_796_length_41540_cov_6.855823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_796_length_41540_cov_6.855823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537936" accession="ERS11139905">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139905</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537936</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_provirus.217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02597739359623315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.1708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_217_length_109696_cov_19.575448_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738243_bin.140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_217_length_109696_cov_19.575448_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__UBA644;g__UBA644;s__UBA644 sp900547165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_provirus.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537937" accession="ERS11139906">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139906</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537937</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_provirus.55</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11976362772252648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_55_length_190851_cov_7.429838_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738243_bin.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_55_length_190851_cov_7.429838_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537938" accession="ERS11139907">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139907</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537938</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_virus.1171</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_1171_length_39547_cov_17.413327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7837837837837838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_1171_length_39547_cov_17.413327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.1171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537939" accession="ERS11139908">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139908</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537939</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_virus.1792</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.1402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_1792_length_29395_cov_15.146531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738243_bin.10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_1792_length_29395_cov_15.146531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.1792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537940" accession="ERS11139909">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139909</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537940</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_virus.239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade sp. virus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1013.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_239_length_104652_cov_621.357896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6274509803921569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_239_length_104652_cov_621.357896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; UAG-readthrough crAss clade sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537942" accession="ERS11139911">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139911</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537942</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_virus.623</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.1668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_623_length_61104_cov_16.692792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_623_length_61104_cov_16.692792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738148_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Selenomonas_A;s__Selenomonas_A sp900769615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537943" accession="ERS11139912">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139912</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537943</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_virus.928</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.2258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_928_length_46439_cov_17.610047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_928_length_46439_cov_17.610047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Oribacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537944" accession="ERS11139913">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139913</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537944</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738244_provirus.232</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738244.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6799559471365638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>112.312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738244) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561180) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_232_length_72256_cov_68.528131_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_232_length_72256_cov_68.528131_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537945" accession="ERS11139914">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139914</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537945</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738244_provirus.80</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738244.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738244) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561180) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_80_length_115202_cov_8.274467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738244_bin.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_80_length_115202_cov_8.274467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537946" accession="ERS11139915">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139915</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537946</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738244_virus.117</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738244.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.8235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738244) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561180) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_117_length_98451_cov_22.556255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9696969696969696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_117_length_98451_cov_22.556255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_virus.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537948" accession="ERS11139917">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139917</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537948</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738244_virus.493</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738244.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10825856553536624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738244) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561180) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_493_length_49989_cov_6.014145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_493_length_49989_cov_6.014145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537949" accession="ERS11139918">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139918</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537949</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738244_virus.896</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738244.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.56614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738244) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561180) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_896_length_34820_cov_4.997784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_896_length_34820_cov_4.997784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea;s__Dorea formicigenerans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.2480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537950" accession="ERS11139919">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139919</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537950</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738245_provirus.24</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738245.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738245) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561113) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_24_length_315731_cov_10.283766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738245_bin.204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_24_length_315731_cov_10.283766_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745434_provirus.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537951" accession="ERS11139920">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139920</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537951</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738245_provirus.5</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738245.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>5.538234328848e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.1804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738245) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561113) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_5_length_495339_cov_75.710168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_5_length_495339_cov_75.710168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537952" accession="ERS11139921">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139921</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537952</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738245_provirus.802</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738245.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738245) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561113) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_802_length_52937_cov_7.611880_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8928571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_802_length_52937_cov_7.611880_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537953" accession="ERS11139922">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139922</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537953</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738245_virus.1660</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738245.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738245) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561113) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_1660_length_31205_cov_11.253341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_1660_length_31205_cov_11.253341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745908_bin.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-433;s__CAG-433 sp000433675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_virus.695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537954" accession="ERS11139923">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139923</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537954</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738245_virus.2204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738245.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.95703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738245) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561113) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_2204_length_24820_cov_4.086368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_2204_length_24820_cov_4.086368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738591_bin.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UMGS1603;s__UMGS1603 sp900553265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_virus.2204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537955" accession="ERS11139924">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139924</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537955</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738245_virus.3841</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738245.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.2987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738245) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561113) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_3841_length_15144_cov_12.051245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_21_1589__NODE_3841_length_15144_cov_12.051245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745613_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;s__Marvinbryantia sp014385005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.3347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537957" accession="ERS11139926">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139926</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537957</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738246_provirus.45</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738246.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.0912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738246) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_45_length_153820_cov_29.305829_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738246_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7837837837837838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_45_length_153820_cov_29.305829_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738173_virus.612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537958" accession="ERS11139927">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139927</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537958</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738246_virus.270</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738246.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6649779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1611.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738246) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_270_length_60775_cov_781.263600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_270_length_60775_cov_781.263600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0338166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537959" accession="ERS11139928">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139928</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537959</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738246_virus.610</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738246.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.9285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738246) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_610_length_31689_cov_10.056086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738246_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6071428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_610_length_31689_cov_10.056086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738173_virus.803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537960" accession="ERS11139929">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139929</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537960</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738247_provirus.156</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7738247.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00379359296842034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.8218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738247) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560527) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_156_length_147936_cov_39.052645_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738247_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_156_length_147936_cov_39.052645_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738493_provirus.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537961" accession="ERS11139930">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139930</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537961</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738247_provirus.592</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738247.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>122.037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738247) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560527) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_592_length_72372_cov_84.149263_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_592_length_72372_cov_84.149263_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-465;g__CAG-465;s__CAG-465 sp000433755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738247_provirus.592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537963" accession="ERS11139932">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139932</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537963</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738247_virus.1722</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738247.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2498898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>930.612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738247) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560527) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_1722_length_35270_cov_540.975251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_1722_length_35270_cov_540.975251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738273_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__WG-1;s__WG-1 sp900539665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738247_virus.1722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537964" accession="ERS11139933">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139933</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537964</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738247_virus.2336</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738247.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738247) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560527) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_2336_length_27329_cov_8.575627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_2336_length_27329_cov_8.575627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.2569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537965" accession="ERS11139934">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139934</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537965</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738247_virus.634</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738247.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03480176211453744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.7885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738247) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560527) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_634_length_69858_cov_31.011966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_634_length_69858_cov_31.011966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746111_bin.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002251295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738247_virus.634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537966" accession="ERS11139935">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139935</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537966</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_provirus.1198</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.6932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_1198_length_44389_cov_20.978674_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738248_bin.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_1198_length_44389_cov_20.978674_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_provirus.1198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537967" accession="ERS11139936">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139936</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537967</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_provirus.2054</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.802477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_2054_length_31731_cov_10.732672_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_2054_length_31731_cov_10.732672_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME108259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900313215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_provirus.763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537968" accession="ERS11139937">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139937</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537968</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_provirus.718</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_718_length_60917_cov_8.211769_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738248_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_718_length_60917_cov_8.211769_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_provirus.718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537969" accession="ERS11139938">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139938</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537969</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_virus.1382</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>101.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_1382_length_40906_cov_68.131941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_1382_length_40906_cov_68.131941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0341270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537971" accession="ERS11139940">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139940</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537971</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_virus.2022</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.7788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_2022_length_32015_cov_16.642714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_2022_length_32015_cov_16.642714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745382_virus.1416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537972" accession="ERS11139941">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139941</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537972</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_virus.2661</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Streptococcus phage Javan351 virus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.63451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_2661_length_26367_cov_4.529479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_2661_length_26367_cov_4.529479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME269197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__KLE1615;s__KLE1615 sp900066985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_virus.1149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Streptococcus phage Javan351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537973" accession="ERS11139942">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139942</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537973</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_virus.572</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.9647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_572_length_70189_cov_19.039808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_572_length_70189_cov_19.039808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_virus.572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537974" accession="ERS11139943">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139943</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537974</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_virus.948</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.41285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_948_length_51261_cov_5.658917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_948_length_51261_cov_5.658917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537975" accession="ERS11139944">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139944</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537975</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738249_provirus.480</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738249.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738249) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559438) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_480_length_64522_cov_8.288665_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_480_length_64522_cov_8.288665_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME007974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp003516765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738249_provirus.480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537977" accession="ERS11139946">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139946</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537977</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738249_virus.1433</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738249.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03884348183251045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.25674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738249) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559438) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_1433_length_29468_cov_5.391719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_1433_length_29468_cov_5.391719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_provirus.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537978" accession="ERS11139947">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139947</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537978</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738249_virus.2352</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738249.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24573785867882625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738249) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559438) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_2352_length_18984_cov_39.528514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_2352_length_18984_cov_39.528514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>uncultured podovirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738249_virus.2352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537979" accession="ERS11139948">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139948</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537979</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738249_virus.363</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738249.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>554.304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738249) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559438) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_363_length_76388_cov_330.993802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4827586206896552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_363_length_76388_cov_330.993802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745420_virus.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537980" accession="ERS11139949">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139949</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537980</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738249_virus.555</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738249.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08631662166094103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.6176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738249) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559438) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_555_length_59486_cov_53.796697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_555_length_59486_cov_53.796697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537981" accession="ERS11139950">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139950</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537981</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738249_virus.796</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738249.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.7353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738249) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559438) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_796_length_46489_cov_11.815436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_796_length_46489_cov_11.815436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_virus.471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537982" accession="ERS11139951">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139951</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537982</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738250_provirus.209</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738250.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738250) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_209_length_85802_cov_11.447711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_209_length_85802_cov_11.447711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537983" accession="ERS11139952">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139952</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537983</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738250_provirus.6</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738250.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.2813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738250) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_6_length_532051_cov_14.074169_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738250_bin.50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_6_length_532051_cov_14.074169_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738250_provirus.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537984" accession="ERS11139953">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139953</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537984</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738250_virus.1538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus virus assembled from ERR7738250.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738250) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_1538_length_23438_cov_12.210308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_1538_length_23438_cov_12.210308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738250_virus.1538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537986" accession="ERS11139955">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139955</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537986</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738250_virus.361</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade sp. virus assembled from ERR7738250.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.7252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738250) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_361_length_61901_cov_47.919497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7049180327868853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_361_length_61901_cov_47.919497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; UAG-readthrough crAss clade sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537987" accession="ERS11139956">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139956</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537987</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738250_virus.598</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738250.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.8517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738250) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_598_length_43555_cov_28.388242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_598_length_43555_cov_28.388242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745823_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__SFJ001;s__SFJ001 sp004555865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738250_virus.598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537988" accession="ERS11139957">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139957</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537988</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738250_virus.908</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738250.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738250) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_908_length_33708_cov_8.521662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_908_length_33708_cov_8.521662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738250_virus.908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537989" accession="ERS11139958">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139958</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537989</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738251_provirus.223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738251.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1786756314816957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738251) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_223_length_101696_cov_6.683662_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738251_bin.222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_223_length_101696_cov_6.683662_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537990" accession="ERS11139959">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139959</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537990</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738251_provirus.633</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe provirus assembled from ERR7738251.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738251) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_633_length_54991_cov_8.382635_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5490196078431373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_633_length_54991_cov_8.382635_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537992" accession="ERS11139961">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139961</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537992</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738251_virus.2241</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738251.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.5732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738251) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_2241_length_24515_cov_10.259105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_2241_length_24515_cov_10.259105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537993" accession="ERS11139962">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139962</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537993</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738251_virus.37</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738251.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738251) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_37_length_234413_cov_8.073019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_37_length_234413_cov_8.073019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_virus.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537994" accession="ERS11139963">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139963</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537994</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738251_virus.986</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738251.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03518635291109225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.68118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738251) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_986_length_42688_cov_5.063763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_986_length_42688_cov_5.063763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537995" accession="ERS11139964">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139964</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537995</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_provirus.222</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.7151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_222_length_109479_cov_24.366657_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738252_bin.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_222_length_109479_cov_24.366657_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745743_provirus.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537996" accession="ERS11139965">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139965</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537996</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_provirus.487</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_487_length_72581_cov_46.443713_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738252_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_487_length_72581_cov_46.443713_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME116282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-873;s__CAG-873 sp001701165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_provirus.487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537997" accession="ERS11139966">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139966</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537997</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.1003</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.1053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_1003_length_45916_cov_30.539541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_1003_length_45916_cov_30.539541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.1551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537998" accession="ERS11139967">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139967</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537998</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.1184</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.38227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_1184_length_40569_cov_5.590882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_1184_length_40569_cov_5.590882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002251385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.1184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538000" accession="ERS11139969">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139969</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538000</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.1561</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.25301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_1561_length_33481_cov_4.628248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_1561_length_33481_cov_4.628248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.1561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538001" accession="ERS11139970">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139970</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538001</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.2256</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_2256_length_25264_cov_6.088538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_2256_length_25264_cov_6.088538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.2256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538002" accession="ERS11139971">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139971</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538002</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.3319</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Erysipelothrix phage SE-1 virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.5769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_3319_length_18758_cov_42.500027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_3319_length_18758_cov_42.500027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738447_bin.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-877;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Erysipelothrix phage SE-1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538003" accession="ERS11139972">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139972</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538003</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.410</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>78.0537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_410_length_81489_cov_44.950671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6451612903225806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_410_length_81489_cov_44.950671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538004" accession="ERS11139973">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139973</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538004</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>209.366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_538_length_68012_cov_123.522794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_538_length_68012_cov_123.522794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538006" accession="ERS11139975">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139975</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538006</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.861</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.0198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_861_length_51298_cov_18.015189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4634146341463415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_861_length_51298_cov_18.015189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.1369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538007" accession="ERS11139976">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139976</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538007</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_provirus.14</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.3195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_14_length_377029_cov_47.716370_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738253_bin.105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_14_length_377029_cov_47.716370_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538008" accession="ERS11139977">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139977</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538008</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_provirus.2903</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.96901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_2903_length_27517_cov_4.283673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738253_bin.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_2903_length_27517_cov_4.283673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746345_bin.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__UBA791;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.2903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538009" accession="ERS11139978">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139978</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538009</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_provirus.808</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.4722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_808_length_73612_cov_15.486245_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738253_bin.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_808_length_73612_cov_15.486245_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538010" accession="ERS11139979">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139979</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538010</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.1103</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6049559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.7813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1103_length_59490_cov_34.418477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1103_length_59490_cov_34.418477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738152_bin.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900546445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.1428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538011" accession="ERS11139980">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139980</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538011</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.1300</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07747797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1300_length_53174_cov_10.513984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1300_length_53174_cov_10.513984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.1300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538012" accession="ERS11139981">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139981</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538012</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.1551</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1551_length_46144_cov_5.746435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1551_length_46144_cov_5.746435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.1551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538013" accession="ERS11139982">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139982</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538013</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.1647</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.9895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1647_length_43994_cov_36.614295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1647_length_43994_cov_36.614295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1750;g__UBA7102;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_virus.1950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538015" accession="ERS11139984">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139984</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538015</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.18900</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_18900_length_4735_cov_8.864105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_18900_length_4735_cov_8.864105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.27072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538016" accession="ERS11139985">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139985</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538016</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.2104</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.69039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_2104_length_36247_cov_4.267321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_2104_length_36247_cov_4.267321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.2104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538017" accession="ERS11139986">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139986</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538017</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.2514</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12738393726507272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_2514_length_30972_cov_7.385111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_2514_length_30972_cov_7.385111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.2514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538018" accession="ERS11139987">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139987</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538018</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.3162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.1828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_3162_length_25480_cov_18.078022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_3162_length_25480_cov_18.078022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0256920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538019" accession="ERS11139988">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139988</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538019</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.3822</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7449339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_3822_length_21432_cov_6.075626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_3822_length_21432_cov_6.075626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538021" accession="ERS11139990">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139990</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538021</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.583</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.7493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_583_length_91495_cov_54.057593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_583_length_91495_cov_54.057593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900545925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538022" accession="ERS11139991">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139991</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538022</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.940</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.5519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_940_length_66128_cov_17.005087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_940_length_66128_cov_17.005087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UMGS973;s__UMGS973 sp900547295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538023" accession="ERS11139992">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139992</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538023</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_provirus.334</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.5885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_334_length_69313_cov_20.654818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738254_bin.109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_334_length_69313_cov_20.654818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_provirus.334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538024" accession="ERS11139993">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139993</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538024</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_provirus.917</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus provirus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_917_length_37406_cov_7.396876_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_917_length_37406_cov_7.396876_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME043942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;s__CAG-103 sp900317855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_provirus.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538025" accession="ERS11139994">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139994</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538025</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.128</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_128_length_113107_cov_9.699133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_128_length_113107_cov_9.699133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738616_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4334;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745398_virus.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538026" accession="ERS11139995">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139995</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538026</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.1536</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.43186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_1536_length_26480_cov_5.351968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_1536_length_26480_cov_5.351968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538027" accession="ERS11139996">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139996</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538027</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.1844</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.50862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_1844_length_23411_cov_4.182009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_1844_length_23411_cov_4.182009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_virus.1811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538029" accession="ERS11139998">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139998</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538029</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.363</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17747797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.09539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_363_length_66549_cov_4.518700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_363_length_66549_cov_4.518700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738147_virus.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538030" accession="ERS11139999">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139999</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538030</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.477</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.5925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_477_length_56999_cov_21.774323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738254_bin.109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_477_length_56999_cov_21.774323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538031" accession="ERS11140000">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140000</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538031</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.567</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.60557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_567_length_51526_cov_5.660227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_567_length_51526_cov_5.660227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538032" accession="ERS11140001">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140001</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538032</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.723</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.58021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_723_length_44274_cov_4.894359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_723_length_44274_cov_4.894359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.2161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538033" accession="ERS11140002">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140002</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538033</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.804</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bdellovibrio phage phi1422 virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.7769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_804_length_41302_cov_35.758108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5357142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_804_length_41302_cov_35.758108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Bdellovibrio phage phi1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538035" accession="ERS11140004">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140004</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538035</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_provirus.224</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_224_length_85233_cov_8.416036_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6071428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_224_length_85233_cov_8.416036_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738256_provirus.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538036" accession="ERS11140005">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140005</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538036</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_provirus.394</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_394_length_63499_cov_6.164612_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738256_bin.50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_394_length_63499_cov_6.164612_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738152_bin.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900546445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_provirus.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538037" accession="ERS11140006">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140006</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538037</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_virus.1025</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_1025_length_35570_cov_12.083932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6176470588235294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_1025_length_35570_cov_12.083932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538038" accession="ERS11140007">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140007</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538038</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_virus.157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_157_length_100743_cov_9.145372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_157_length_100743_cov_9.145372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_virus.132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538039" accession="ERS11140008">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140008</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538039</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_virus.221</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_221_length_85847_cov_7.343873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5405405405405406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_221_length_85847_cov_7.343873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_virus.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538040" accession="ERS11140009">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140009</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538040</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_virus.445</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.4596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_445_length_58974_cov_16.222218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_445_length_58974_cov_16.222218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538041" accession="ERS11140010">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140010</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538041</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_virus.677</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>185.139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_677_length_45625_cov_108.466936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_677_length_45625_cov_108.466936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537853" accession="ERS11139823">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139823</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537853</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738230_virus.47</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7738230.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738230) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561839) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_47_length_235658_cov_7.348971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5573770491803278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_I_8_1124__NODE_47_length_235658_cov_7.348971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738628_bin.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA1394;s__UBA1394 sp900554975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_virus.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537874" accession="ERS11139844">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139844</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537874</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738233_virus.371</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559747) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_371_length_76818_cov_6.433627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.41379310344827586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_8_2357__NODE_371_length_76818_cov_6.433627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_provirus.192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537882" accession="ERS11139852">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139852</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537882</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738234_virus.618</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738234.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.3513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738234) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_618_length_63250_cov_45.872841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_17_2661__NODE_618_length_63250_cov_45.872841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738575_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-279;s__CAG-279 sp000437795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_virus.618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537888" accession="ERS11139858">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139858</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537888</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738235_virus.1463</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738235.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738235) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559615) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_1463_length_36277_cov_6.371436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_12_2311__NODE_1463_length_36277_cov_6.371436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_provirus.739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537897" accession="ERS11139867">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139867</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537897</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738237_virus.1392</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Pseudomonas phage PPAT virus assembled from ERR7738237.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>162.964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738237) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560661) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_1392_length_33887_cov_93.635644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_1_1708__NODE_1392_length_33887_cov_93.635644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745613_virus.1460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Pseudomonas phage PPAT</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537903" accession="ERS11139873">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139873</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537903</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738238_virus.4305</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738238.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.48951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738238) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560650) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_3_1720__NODE_4305_length_6869_cov_4.521790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_3_1720__NODE_4305_length_6869_cov_4.521790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME089567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus sp001556435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738238_virus.4305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537911" accession="ERS11139881">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139881</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537911</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738239_virus.751</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738239.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738239) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562116) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_751_length_49752_cov_8.438671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_17_1528__NODE_751_length_49752_cov_8.438671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_provirus.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537917" accession="ERS11139887">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139887</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537917</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738240_virus.2136</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7738240.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1960214464596693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.03402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738240) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562259) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_2136_length_28959_cov_5.174295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7407407407407407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_17_1504__NODE_2136_length_28959_cov_5.174295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_virus.231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537927" accession="ERS11139896">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139896</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537927</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738241_virus.2891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738241.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04719162995594714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738241) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_2891_length_19166_cov_11.019540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_4_1602__NODE_2891_length_19166_cov_11.019540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537933" accession="ERS11139902">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139902</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537933</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738242_virus.16</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738242.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738242) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561200) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_16_length_225184_cov_12.306001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_14_1607__NODE_16_length_225184_cov_12.306001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738242_virus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537941" accession="ERS11139910">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139910</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537941</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738243_virus.437</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Schitoviridae virus assembled from ERR7738243.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11513032049760293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.1392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738243) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_437_length_74939_cov_38.976063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_1_2651__NODE_437_length_74939_cov_38.976063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Schitoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537947" accession="ERS11139916">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139916</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537947</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738244_virus.336</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738244.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.87277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738244) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561180) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_336_length_60428_cov_5.836457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_24_1613__NODE_336_length_60428_cov_5.836457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537956" accession="ERS11139925">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139925</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537956</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738246_provirus.125</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738246.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1674559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.7204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738246) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_125_length_98526_cov_43.958638_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738246_bin.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_24_1344__NODE_125_length_98526_cov_43.958638_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738246_provirus.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537962" accession="ERS11139931">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139931</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537962</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738247_virus.1269</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 virus assembled from ERR7738247.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>7.8965839962564e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.1459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738247) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560527) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_1269_length_44094_cov_30.641752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_22_1922__NODE_1269_length_44094_cov_30.641752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738152_bin.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900546445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.1061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537970" accession="ERS11139939">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139939</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537970</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738248_virus.1669</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738248.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738248) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567741) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_1669_length_35967_cov_6.107189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_18_2379__NODE_1669_length_35967_cov_6.107189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_virus.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537976" accession="ERS11139945">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139945</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537976</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738249_virus.1064</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738249.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>166.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738249) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559438) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_1064_length_37340_cov_100.184446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_23_2656__NODE_1064_length_37340_cov_100.184446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900066885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738249_virus.1064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537985" accession="ERS11139954">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139954</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537985</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738250_virus.1889</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738250.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738250) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_1889_length_19859_cov_5.767566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_6_1832__NODE_1889_length_19859_cov_5.767566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0167695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537991" accession="ERS11139960">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139960</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537991</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738251_virus.106</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738251.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738251) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_106_length_141988_cov_8.218736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_23_1525__NODE_106_length_141988_cov_8.218736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13537999" accession="ERS11139968">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139968</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13537999</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.13</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13537999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_13_length_353189_cov_64.297458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5045045045045045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_13_length_353189_cov_64.297458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538005" accession="ERS11139974">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139974</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538005</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738252_virus.693</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738252.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.87534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738252) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_693_length_58260_cov_5.901243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_1_1963__NODE_693_length_58260_cov_5.901243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738537_bin.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales_A;f__UBA1381;g__UBA4716;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538014" accession="ERS11139983">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139983</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538014</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.1793</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.3974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1793_length_41053_cov_31.826825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7407407407407407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_1793_length_41053_cov_31.826825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738253_bin.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;s__Ruminiclostridium_E sp900556525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.1793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538020" accession="ERS11139989">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139989</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538020</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738253_virus.4521</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738253.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03243392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738253) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561663) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_4521_length_18306_cov_6.102858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738253_bin.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_24_1194__NODE_4521_length_18306_cov_6.102858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538028" accession="ERS11139997">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11139997</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538028</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.2456</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bcepmuvirus virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.22052497998750176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.64779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_2456_length_18917_cov_3.718206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_2456_length_18917_cov_3.718206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738545_provirus.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Bcepmuvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538034" accession="ERS11140003">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140003</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538034</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738254_virus.967</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738254.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.7743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738254) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_967_length_36154_cov_5.341908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_2_1148__NODE_967_length_36154_cov_5.341908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538042" accession="ERS11140011">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140011</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538042</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738256_virus.932</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738256.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738256) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561283) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_932_length_38110_cov_6.365551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_11_1304__NODE_932_length_38110_cov_6.365551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_virus.1811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538043" accession="ERS11140012">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140012</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538043</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_provirus.393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>232.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_393_length_62049_cov_146.322985_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_393_length_62049_cov_146.322985_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_provirus.266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538044" accession="ERS11140013">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140013</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538044</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_provirus.68</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.1779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_68_length_143002_cov_21.517453_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738257_bin.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_68_length_143002_cov_21.517453_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738257_provirus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538045" accession="ERS11140014">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140014</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538045</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_virus.1153</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_1153_length_33721_cov_4.363750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_1153_length_33721_cov_4.363750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738257_virus.1153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538046" accession="ERS11140015">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140015</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538046</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_virus.1547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_1547_length_27643_cov_8.998368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_1547_length_27643_cov_8.998368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.1765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538047" accession="ERS11140016">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140016</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538047</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_virus.2853</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02856549932310539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_2853_length_17698_cov_6.609897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_2853_length_17698_cov_6.609897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738257_virus.2853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538048" accession="ERS11140017">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140017</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538048</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_virus.562</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.5107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_562_length_51887_cov_9.450704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5161290322580645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_562_length_51887_cov_9.450704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738147_virus.298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538049" accession="ERS11140018">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140018</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538049</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_virus.78</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_78_length_135533_cov_6.272435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_78_length_135533_cov_6.272435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738257_virus.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538050" accession="ERS11140019">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140019</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538050</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738257_virus.912</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7738257.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.3993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738257) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_912_length_39395_cov_13.745307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_18_1188__NODE_912_length_39395_cov_13.745307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738257_virus.912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538051" accession="ERS11140020">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140020</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538051</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738258_provirus.373</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lugh provirus assembled from ERR7738258.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738258) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567482) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_373_length_70877_cov_9.464336_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738258_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5357142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_373_length_70877_cov_9.464336_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738451_virus.1084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lughvirus; Faecalibacterium virus Lugh</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538052" accession="ERS11140021">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140021</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538052</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738258_virus.1378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738258.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.27121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738258) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567482) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_1378_length_26344_cov_4.650969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_1378_length_26344_cov_4.650969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538053" accession="ERS11140022">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140022</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538053</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738258_virus.299</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738258.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.0822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738258) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567482) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_299_length_79348_cov_55.942766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.627906976744186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_299_length_79348_cov_55.942766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738551_provirus.252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538054" accession="ERS11140023">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140023</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538054</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738258_virus.705</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738258.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.23463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738258) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567482) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_705_length_46338_cov_4.408919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_705_length_46338_cov_4.408919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0354261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538055" accession="ERS11140024">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140024</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538055</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738258_virus.889</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7738258.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>148.636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738258) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567482) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_889_length_39187_cov_87.568346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_5_THA1064JZ__NODE_889_length_39187_cov_87.568346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_virus.1413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538056" accession="ERS11140025">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140025</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538056</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738259_provirus.230</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7738259.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.2726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738259) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560362) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_230_length_80833_cov_41.243140_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738259_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_230_length_80833_cov_41.243140_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_provirus.123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538057" accession="ERS11140026">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140026</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538057</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738259_virus.1337</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lugh virus assembled from ERR7738259.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738259) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560362) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_1337_length_28266_cov_4.864344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_1337_length_28266_cov_4.864344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737974_provirus.828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lughvirus; Faecalibacterium virus Lugh</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538058" accession="ERS11140027">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140027</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538058</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738259_virus.251</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738259.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.5751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738259) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560362) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_251_length_76814_cov_12.953008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3611111111111111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_251_length_76814_cov_12.953008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_virus.251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538059" accession="ERS11140028">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140028</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538059</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738259_virus.458</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738259.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738259) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560362) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_458_length_55758_cov_47.532839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_458_length_55758_cov_47.532839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_virus.458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538060" accession="ERS11140029">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140029</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538060</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738259_virus.692</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738259.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738259) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560362) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_692_length_43825_cov_9.615182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8484848484848485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_692_length_43825_cov_9.615182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_virus.692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538061" accession="ERS11140030">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140030</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538061</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738259_virus.956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738259.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738259) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560362) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_956_length_35728_cov_7.727553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_M_13_1797__NODE_956_length_35728_cov_7.727553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738616_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4334;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.1152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538062" accession="ERS11140031">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140031</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538062</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738260_provirus.38</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738260.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738260) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560003) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_38_length_185058_cov_30.332337_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9016393442622952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_38_length_185058_cov_30.332337_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738260_provirus.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538063" accession="ERS11140032">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140032</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538063</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738260_provirus.798</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738260.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738260) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560003) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_798_length_46181_cov_6.330058_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8292682926829268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_798_length_46181_cov_6.330058_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lacticaseibacillus;s__Lacticaseibacillus paracasei</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738260_provirus.798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538064" accession="ERS11140033">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140033</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538064</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738260_virus.1454</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738260.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.26745594713656384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738260) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560003) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_1454_length_31319_cov_54.623776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_1454_length_31319_cov_54.623776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_provirus.1220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538065" accession="ERS11140034">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140034</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538065</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738260_virus.2058</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738260.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738260) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560003) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_2058_length_24230_cov_5.992589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_2058_length_24230_cov_5.992589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538066" accession="ERS11140035">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140035</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538066</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738260_virus.330</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738260.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>152.968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738260) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560003) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_330_length_74869_cov_74.703551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738260_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_330_length_74869_cov_74.703551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538067" accession="ERS11140036">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140036</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538067</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738260_virus.586</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738260.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03997797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738260) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560003) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_586_length_54907_cov_9.449188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47368421052631576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_1_1976__NODE_586_length_54907_cov_9.449188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738260_virus.586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538068" accession="ERS11140037">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140037</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538068</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738261_provirus.146</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738261.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738261) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_146_length_127636_cov_10.082111_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738261_bin.159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_146_length_127636_cov_10.082111_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745594_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;s__UBA636 sp900546285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_provirus.146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538069" accession="ERS11140038">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140038</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538069</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738261_provirus.283</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738261.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.0446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738261) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_283_length_91891_cov_23.754014_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_283_length_91891_cov_23.754014_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738617_provirus.139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538070" accession="ERS11140039">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140039</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538070</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738261_provirus.716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738261.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738261) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_716_length_55843_cov_7.778969_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738261_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_716_length_55843_cov_7.778969_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Bariatricus;s__Bariatricus comes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_provirus.716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538071" accession="ERS11140040">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140040</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538071</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738261_virus.1664</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738261.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738261) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_1664_length_32340_cov_9.506215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738261_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_1664_length_32340_cov_9.506215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_virus.1973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538072" accession="ERS11140041">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140041</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538072</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738261_virus.285</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738261.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.7704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738261) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_285_length_91614_cov_13.181446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.935483870967742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_285_length_91614_cov_13.181446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_virus.285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538073" accession="ERS11140042">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140042</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538073</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738261_virus.869</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738261.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.7629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738261) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560322) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_869_length_50253_cov_22.934869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_23_1769__NODE_869_length_50253_cov_22.934869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_virus.773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538074" accession="ERS11140043">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140043</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538074</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738262_provirus.456</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738262.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738262) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567395) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_11_THA0065JZ__NODE_456_length_65670_cov_13.106795_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5357142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_11_THA0065JZ__NODE_456_length_65670_cov_13.106795_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0346573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538075" accession="ERS11140044">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140044</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538075</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738262_virus.1641</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Minhovirus virus assembled from ERR7738262.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5794.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738262) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567395) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_11_THA0065JZ__NODE_1641_length_19611_cov_3323.641599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_11_THA0065JZ__NODE_1641_length_19611_cov_3323.641599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738262_virus.1641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Rountreeviridae; Sarlesvirinae; Minhovirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538076" accession="ERS11140045">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140045</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538076</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738262_virus.774</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738262.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>449.062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738262) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567395) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_11_THA0065JZ__NODE_774_length_41762_cov_263.813962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_11_THA0065JZ__NODE_774_length_41762_cov_263.813962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_C</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0303461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538077" accession="ERS11140046">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140046</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538077</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_provirus.161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.7982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_161_length_146848_cov_16.031948_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738263_bin.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7837837837837838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_161_length_146848_cov_16.031948_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0272478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538078" accession="ERS11140047">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140047</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538078</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_provirus.299</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.94721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_299_length_113429_cov_5.789673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738263_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_299_length_113429_cov_5.789673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.1123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538079" accession="ERS11140048">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140048</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538079</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_provirus.52</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.3404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_52_length_254286_cov_44.562116_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738263_bin.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_52_length_254286_cov_44.562116_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538080" accession="ERS11140049">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140049</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538080</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_provirus.913</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07305889139359004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_913_length_61081_cov_6.524441_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738263_bin.101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_913_length_61081_cov_6.524441_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME258969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__CAG-269 sp001916005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738263_provirus.913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538081" accession="ERS11140050">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140050</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538081</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_virus.134</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2849118942731278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_134_length_160185_cov_50.412284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6607142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_134_length_160185_cov_50.412284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738167_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFHK01;s__SFHK01 sp004556395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538082" accession="ERS11140051">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140051</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538082</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_virus.1650</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_1650_length_39828_cov_10.790144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_1650_length_39828_cov_10.790144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538083" accession="ERS11140052">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140052</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538083</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_virus.2038</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.3491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_2038_length_33807_cov_11.416513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_2038_length_33807_cov_11.416513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746757_virus.1355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538084" accession="ERS11140053">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140053</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538084</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_virus.3165</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.902477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.2982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_3165_length_23487_cov_4.113071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738263_bin.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_3165_length_23487_cov_4.113071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538085" accession="ERS11140054">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140054</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538085</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_virus.66</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_66_length_222718_cov_18.327473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_66_length_222718_cov_18.327473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738263_virus.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538086" accession="ERS11140055">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140055</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538086</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738263_virus.986</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738263.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.2321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738263) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_986_length_58167_cov_24.774591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_17_2510__NODE_986_length_58167_cov_24.774591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538087" accession="ERS11140056">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140056</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538087</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_provirus.1947</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.57335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_1947_length_31518_cov_4.827296_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_1947_length_31518_cov_4.827296_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738575_virus.463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538088" accession="ERS11140057">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140057</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538088</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_provirus.407</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_407_length_87478_cov_8.673116_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738264_bin.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_407_length_87478_cov_8.673116_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__RF32;f__CAG-977;g__UBA2903;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_provirus.407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538089" accession="ERS11140058">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140058</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538089</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_provirus.983</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.6987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_983_length_49974_cov_14.926529_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738264_bin.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_983_length_49974_cov_14.926529_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745433_bin.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_provirus.983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538090" accession="ERS11140059">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140059</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538090</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_virus.1196</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.6888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_1196_length_43757_cov_14.141598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_1196_length_43757_cov_14.141598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.187853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538091" accession="ERS11140060">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140060</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538091</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_virus.1634</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_1634_length_35845_cov_7.080044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_1634_length_35845_cov_7.080044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_virus.1634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538092" accession="ERS11140061">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140061</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538092</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_virus.2083</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0873898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_2083_length_30147_cov_6.585434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7837837837837838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_2083_length_30147_cov_6.585434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738574_virus.239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538093" accession="ERS11140062">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140062</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538093</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_virus.2899</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1262875439947041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.63793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_2899_length_23648_cov_5.032328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_2899_length_23648_cov_5.032328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538094" accession="ERS11140063">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140063</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538094</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_virus.3542</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05501014145734777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.1749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_3542_length_20285_cov_27.041865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_3542_length_20285_cov_27.041865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.3846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538095" accession="ERS11140064">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140064</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538095</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_virus.677</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.1122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_677_length_63660_cov_26.338864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_677_length_63660_cov_26.338864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538096" accession="ERS11140065">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140065</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538096</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738264_virus.916</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738264.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17129856875493005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.4725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738264) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_916_length_52422_cov_13.477620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_13_1594__NODE_916_length_52422_cov_13.477620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738169_virus.588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538097" accession="ERS11140066">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140066</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538097</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738265_provirus.507</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738265.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2824339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>92.3621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738265) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_23_THA1066YZ__NODE_507_length_54152_cov_55.403329_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_23_THA1066YZ__NODE_507_length_54152_cov_55.403329_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME239670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium pseudocatenulatum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738265_provirus.507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538098" accession="ERS11140067">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140067</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538098</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738265_virus.268</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738265.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738265) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_23_THA1066YZ__NODE_268_length_83275_cov_14.165737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7446808510638298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_23_THA1066YZ__NODE_268_length_83275_cov_14.165737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME283914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900066565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738653_provirus.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538099" accession="ERS11140068">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140068</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538099</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738265_virus.649</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Kagunavirus virus assembled from ERR7738265.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738265) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567500) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_23_THA1066YZ__NODE_649_length_45245_cov_6.110233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6891891891891891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_23_THA1066YZ__NODE_649_length_45245_cov_6.110233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738265_virus.649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Guernseyvirinae; Kagunavirus; unclassified Kagunavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538100" accession="ERS11140069">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140069</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538100</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_provirus.171</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12951046580442682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.9583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_171_length_125421_cov_16.932968_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738266_bin.126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_171_length_125421_cov_16.932968_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746433_provirus.452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538101" accession="ERS11140070">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140070</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538101</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_provirus.507</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04234581497797356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.0735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_507_length_70536_cov_12.904427_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738266_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_507_length_70536_cov_12.904427_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738240_provirus.361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538102" accession="ERS11140071">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140071</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538102</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_virus.1045</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.8881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1045_length_44410_cov_19.069361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1045_length_44410_cov_19.069361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1750;g__UBA7102;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538103" accession="ERS11140072">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140072</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538103</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_virus.1304</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.23672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1304_length_38943_cov_4.540138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1304_length_38943_cov_4.540138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738588_bin.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Thermoplasmatota;c__Thermoplasmata;o__Methanomassiliicoccales;f__Methanomethylophilaceae;g__UBA71;s__UBA71 sp006954465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538104" accession="ERS11140073">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140073</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538104</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_virus.1660</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1660_length_33207_cov_7.963960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1660_length_33207_cov_7.963960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738571_bin.280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1696;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.1529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538105" accession="ERS11140074">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140074</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538105</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_virus.1963</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1963_length_29812_cov_9.860905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_1963_length_29812_cov_9.860905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_virus.1483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538106" accession="ERS11140075">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140075</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538106</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_virus.2614</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_2614_length_24345_cov_8.248269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_2614_length_24345_cov_8.248269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFFH01;s__SFFH01 sp900542395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745408_virus.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538107" accession="ERS11140076">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140076</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538107</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_virus.378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_378_length_83708_cov_11.000060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5652173913043478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_378_length_83708_cov_11.000060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538108" accession="ERS11140077">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140077</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538108</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738266_virus.771</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738266.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1253845907965548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.6413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738266) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561381) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_771_length_54165_cov_14.344624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_8_1508__NODE_771_length_54165_cov_14.344624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745881_virus.717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538109" accession="ERS11140078">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140078</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538109</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738267_provirus.1240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738267.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738267) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561865) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_1240_length_33215_cov_10.412004_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738267_bin.146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_1240_length_33215_cov_10.412004_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.1633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538110" accession="ERS11140079">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140079</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538110</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738267_provirus.444</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7738267.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05275243220624644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738267) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561865) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_444_length_62402_cov_9.008263_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_444_length_62402_cov_9.008263_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_provirus.810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538111" accession="ERS11140080">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140080</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538111</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738267_provirus.91</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738267.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738267) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561865) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_91_length_134647_cov_9.611266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4918032786885246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_91_length_134647_cov_9.611266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738267_provirus.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538112" accession="ERS11140081">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140081</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538112</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738267_virus.145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738267.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>337.984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738267) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561865) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_145_length_114131_cov_194.221369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738267_bin.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9027777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_145_length_114131_cov_194.221369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738233_virus.173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538113" accession="ERS11140082">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140082</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538113</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738267_virus.2049</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738267.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.7432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738267) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561865) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_2049_length_22701_cov_12.531648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_2049_length_22701_cov_12.531648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.2731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538114" accession="ERS11140083">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140083</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538114</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738267_virus.451</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738267.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.6842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738267) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561865) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_451_length_62038_cov_41.345395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_18_2352__NODE_451_length_62038_cov_41.345395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746012_virus.484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538115" accession="ERS11140084">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140084</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538115</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738268_provirus.1565</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738268.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.61767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738268) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561917) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_1565_length_34756_cov_4.889039_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_1565_length_34756_cov_4.889039_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0276909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538116" accession="ERS11140085">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140085</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538116</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738268_provirus.649</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738268.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07714757709251101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>127.898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738268) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561917) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_649_length_63832_cov_77.628013_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738268_bin.194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_649_length_63832_cov_77.628013_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_provirus.649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538117" accession="ERS11140086">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140086</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538117</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738268_virus.1347</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738268.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.1115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738268) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561917) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_1347_length_38649_cov_14.257026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_1347_length_38649_cov_14.257026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745867_virus.243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538118" accession="ERS11140087">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140087</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538118</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738268_virus.1552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738268.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.7972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738268) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561917) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_1552_length_34982_cov_25.343160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_1552_length_34982_cov_25.343160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.1552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538119" accession="ERS11140088">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140088</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538119</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738268_virus.2079</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738268.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738268) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561917) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_2079_length_28273_cov_13.710881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_2079_length_28273_cov_13.710881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0379427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538120" accession="ERS11140089">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140089</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538120</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738268_virus.2786</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738268.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12096128563594025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738268) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561917) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_2786_length_22211_cov_12.022409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_2786_length_22211_cov_12.022409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.2786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538121" accession="ERS11140090">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140090</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538121</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738268_virus.758</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738268.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.9904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738268) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561917) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_758_length_58177_cov_13.182754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6511627906976745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_19_1056__NODE_758_length_58177_cov_13.182754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME283914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900066565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538122" accession="ERS11140091">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140091</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538122</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738269_provirus.171</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738269.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.5138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738269) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559654) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_171_length_144310_cov_24.490803_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738269_bin.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_171_length_144310_cov_24.490803_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0355909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538123" accession="ERS11140092">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140092</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538123</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738269_provirus.543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738269.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.82228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738269) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559654) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_543_length_70292_cov_4.825721_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738269_bin.113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9655172413793104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_543_length_70292_cov_4.825721_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME216999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales_A;f__UBA1381;g__CAG-41;s__CAG-41 sp900066215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0329916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538124" accession="ERS11140093">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140093</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538124</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738269_virus.1226</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738269.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.6903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738269) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559654) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_1226_length_40260_cov_14.424682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8484848484848485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_1226_length_40260_cov_14.424682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_virus.1226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538125" accession="ERS11140094">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140094</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538125</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738269_virus.1695</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738269.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738269) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559654) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_1695_length_30746_cov_6.412534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_1695_length_30746_cov_6.412534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745412_virus.1128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538126" accession="ERS11140095">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140095</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538126</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738269_virus.3608</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738269.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20772215993452803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738269) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559654) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_3608_length_16691_cov_5.998917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738269_bin.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_3608_length_16691_cov_5.998917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745412_virus.3257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538127" accession="ERS11140096">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140096</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538127</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738269_virus.783</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738269.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.2991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738269) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559654) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_783_length_55055_cov_38.330860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7457627118644068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_14_2337__NODE_783_length_55055_cov_38.330860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_virus.783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538128" accession="ERS11140097">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140097</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538128</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738270_provirus.16</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae provirus assembled from ERR7738270.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.0835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738270) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_16_length_284272_cov_27.940858_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738270_bin.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_16_length_284272_cov_27.940858_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.1323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538129" accession="ERS11140098">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140098</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538129</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738270_provirus.593</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738270.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.9872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738270) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_593_length_49091_cov_17.953115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_593_length_49091_cov_17.953115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_virus.1159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538130" accession="ERS11140099">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140099</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538130</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738270_virus.138</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738270.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.8072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738270) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_138_length_102291_cov_37.390622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9913793103448276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_138_length_102291_cov_37.390622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538131" accession="ERS11140100">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140100</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538131</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738270_virus.258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738270.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738270) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_258_length_77019_cov_12.629799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8723404255319149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_258_length_77019_cov_12.629799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_virus.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538132" accession="ERS11140101">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140101</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538132</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738270_virus.645</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738270.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.9497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738270) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560494) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_645_length_46848_cov_38.643689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_14_1852__NODE_645_length_46848_cov_38.643689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_virus.471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538133" accession="ERS11140102">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140102</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538133</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_provirus.1037</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04730176211453744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.19703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_1037_length_48961_cov_4.493065_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_1037_length_48961_cov_4.493065_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538134" accession="ERS11140103">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140103</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538134</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_provirus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>143.957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_272_length_102476_cov_84.194133_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738271_bin.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_272_length_102476_cov_84.194133_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000434975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.1119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538135" accession="ERS11140104">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140104</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538135</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_virus.10278</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03729583153820043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.11448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_10278_length_8422_cov_2.775794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_10278_length_8422_cov_2.775794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.9454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538136" accession="ERS11140105">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140105</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538136</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_virus.1342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.5372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_1342_length_41777_cov_10.575492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9117647058823528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_1342_length_41777_cov_10.575492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745438_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp004553545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.1197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538137" accession="ERS11140106">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140106</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538137</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_virus.1660</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.5722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_1660_length_36272_cov_24.146761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_1660_length_36272_cov_24.146761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_virus.676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538138" accession="ERS11140107">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140107</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538138</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_virus.2196</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.7088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_2196_length_29669_cov_18.620539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_2196_length_29669_cov_18.620539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_virus.391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538139" accession="ERS11140108">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140108</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538139</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_virus.2674</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.5367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_2674_length_25508_cov_23.942236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_2674_length_25508_cov_23.942236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__SFDB01;s__SFDB01 sp004558825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746440_virus.1399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538140" accession="ERS11140109">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140109</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538140</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_virus.333</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured CrAss-like virus sp. virus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_333_length_92480_cov_7.041395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8305084745762712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_333_length_92480_cov_7.041395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; CrAss-like virus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538141" accession="ERS11140110">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140110</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538141</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738271_virus.722</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738271.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>234.846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738271) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_722_length_61458_cov_137.354132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_3_2391__NODE_722_length_61458_cov_137.354132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738604_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745329_virus.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538142" accession="ERS11140111">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140111</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538142</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738272_provirus.167_2</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lactococcus phage bIL309 provirus assembled from ERR7738272.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738272) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_167_length_115316_cov_12.335885_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738272_bin.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_167_length_115316_cov_12.335885_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME206947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Weissella;s__Weissella cibaria</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738272_provirus.167_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lactococcus phage bIL309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538143" accession="ERS11140112">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140112</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538143</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738272_provirus.429</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738272.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.4445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738272) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_429_length_66961_cov_40.133395_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9487179487179488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_429_length_66961_cov_40.133395_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Limosilactobacillus;s__Limosilactobacillus mucosae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738272_provirus.429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538144" accession="ERS11140113">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140113</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538144</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738272_virus.1181</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738272.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>286.529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738272) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_1181_length_27123_cov_164.369223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_1181_length_27123_cov_164.369223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Sutterella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738272_virus.1181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538145" accession="ERS11140114">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140114</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538145</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738272_virus.229</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738272.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738272) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_229_length_98310_cov_14.014466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_229_length_98310_cov_14.014466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia sp900552665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738535_virus.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538146" accession="ERS11140115">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140115</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538146</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738272_virus.708</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738272.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738272) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_708_length_46024_cov_62.916338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6041666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_708_length_46024_cov_62.916338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0335721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538147" accession="ERS11140116">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140116</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538147</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738272_virus.825</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738272.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738272) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_825_length_40004_cov_11.855962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_825_length_40004_cov_11.855962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738272_virus.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538148" accession="ERS11140117">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140117</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538148</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738272_virus.936</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738272.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.62546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738272) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567287) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_936_length_35540_cov_5.384119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_11_RAU1009YZ__NODE_936_length_35540_cov_5.384119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME103816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;s__Dorea_A longicatena</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738272_virus.936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538149" accession="ERS11140118">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140118</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538149</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_provirus.141</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00183820478765556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.8469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_141_length_186301_cov_25.324083_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_141_length_186301_cov_25.324083_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_provirus.141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538150" accession="ERS11140119">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140119</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538150</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_provirus.303</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_303_length_132361_cov_10.840275_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_303_length_132361_cov_10.840275_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_provirus.303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538151" accession="ERS11140120">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140120</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538151</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_provirus.747</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.86303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_747_length_76604_cov_5.636011_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738273_bin.154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9069767441860463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_747_length_76604_cov_5.636011_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME121269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__CAG-115 sp003516865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_provirus.747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538152" accession="ERS11140121">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140121</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538152</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.1220</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1220_length_55363_cov_7.343143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1220_length_55363_cov_7.343143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538153" accession="ERS11140122">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140122</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538153</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.1513</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Paenibacillus phage vB_PlaP_API480 virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01373334967392014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1513_length_47881_cov_11.541691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1513_length_47881_cov_11.541691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.1513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Paenibacillus phage vB_PlaP_API480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538154" accession="ERS11140123">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140123</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538154</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.1670</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1670_length_44024_cov_9.053041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1670_length_44024_cov_9.053041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME010079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1865;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.1020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538155" accession="ERS11140124">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140124</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538155</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.1847</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1847_length_40982_cov_11.953893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1847_length_40982_cov_11.953893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.1847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538156" accession="ERS11140125">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140125</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538156</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.1985</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.1555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1985_length_39038_cov_10.134878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_1985_length_39038_cov_10.134878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME158618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__UBA644;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.1985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538157" accession="ERS11140126">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140126</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538157</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.2217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_2217_length_35989_cov_6.151537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_2217_length_35989_cov_6.151537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__UBA1234;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.1178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538158" accession="ERS11140127">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140127</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538158</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.2547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11475770925110132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_2547_length_31630_cov_6.294996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738273_bin.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_2547_length_31630_cov_6.294996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.1576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538159" accession="ERS11140128">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140128</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538159</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.3352</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_3352_length_24991_cov_7.087220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_3352_length_24991_cov_7.087220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.3352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538160" accession="ERS11140129">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140129</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538160</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.3746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_462_length_102153_cov_32.734237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_462_length_102153_cov_32.734237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538161" accession="ERS11140130">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140130</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538161</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.5332</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.45332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_5332_length_16933_cov_5.131882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_5332_length_16933_cov_5.131882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.5332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538162" accession="ERS11140131">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140131</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538162</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738273_virus.857</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738273.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738273) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561911) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_857_length_69722_cov_8.173035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_17_1054__NODE_857_length_69722_cov_8.173035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738627_virus.666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538163" accession="ERS11140132">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140132</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538163</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_provirus.1238</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.3363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_1238_length_55975_cov_27.485796_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_1238_length_55975_cov_27.485796_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745410_virus.1183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538164" accession="ERS11140133">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140133</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538164</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_provirus.32</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_32_length_320719_cov_9.779446_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738274_bin.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9629629629629628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_32_length_320719_cov_9.779446_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME233621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D sp000434695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_provirus.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538165" accession="ERS11140134">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140134</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538165</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_provirus.786</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_786_length_74369_cov_17.015776_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738274_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_786_length_74369_cov_17.015776_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_provirus.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538166" accession="ERS11140135">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140135</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538166</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_virus.1280</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.4642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_1280_length_54712_cov_13.999689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_1280_length_54712_cov_13.999689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538167" accession="ERS11140136">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140136</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538167</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_virus.1803</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.37278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_1803_length_43234_cov_4.590426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_1803_length_43234_cov_4.590426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745858_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Anaerotignaceae;g__UMGS1670;s__UMGS1670 sp900553995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538168" accession="ERS11140137">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140137</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538168</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_virus.2201</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.5206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_2201_length_37381_cov_15.228635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7037037037037037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_2201_length_37381_cov_15.228635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.2201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538169" accession="ERS11140138">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140138</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538169</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_virus.3925</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2753052220829365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.40013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_3925_length_23600_cov_5.114399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_3925_length_23600_cov_5.114399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-533;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738493_provirus.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538170" accession="ERS11140139">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140139</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538170</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_virus.4963</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1193047942582454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.5597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_4963_length_19452_cov_13.464694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_4963_length_19452_cov_13.464694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737966_virus.2571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538171" accession="ERS11140140">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140140</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538171</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738274_virus.89</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738274.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.0561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738274) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559904) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_89_length_212908_cov_10.922041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.46296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_19_2488__NODE_89_length_212908_cov_10.922041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME099131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000435075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538172" accession="ERS11140141">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140141</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538172</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738275_provirus.187</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738275.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>165.285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738275) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567752) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_187_length_100889_cov_123.109392_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738275_bin.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_187_length_100889_cov_123.109392_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.3646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538173" accession="ERS11140142">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140142</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538173</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738275_provirus.53</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738275.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.6205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738275) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567752) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_53_length_182372_cov_29.549560_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738275_bin.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_53_length_182372_cov_29.549560_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737953_virus.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538174" accession="ERS11140143">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140143</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538174</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738275_virus.1338</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738275.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738275) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567752) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_1338_length_37033_cov_9.060288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_1338_length_37033_cov_9.060288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738275_virus.1338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538175" accession="ERS11140144">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140144</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538175</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738275_virus.1988</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Acinetobacter phage MD-2021a virus assembled from ERR7738275.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>96.9919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738275) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567752) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_1988_length_27921_cov_58.829048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_1988_length_27921_cov_58.829048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738275_virus.1988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Acinetobacter phage MD-2021a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538176" accession="ERS11140145">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140145</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538176</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738275_virus.3719</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738275.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738275) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567752) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_3719_length_18042_cov_7.647147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_3719_length_18042_cov_7.647147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.6060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538177" accession="ERS11140146">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140146</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538177</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738275_virus.616</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738275.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.3703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738275) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567752) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_616_length_59229_cov_15.317961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738275_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_616_length_59229_cov_15.317961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745743_virus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538178" accession="ERS11140147">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140147</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538178</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738275_virus.884</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738275.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>118.751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738275) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567752) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_884_length_48567_cov_73.579666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_2_2185__NODE_884_length_48567_cov_73.579666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>Siphoviridae environmental samples</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738275_virus.884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538179" accession="ERS11140148">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140148</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538179</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_provirus.1466</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1466_length_46023_cov_7.038850_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1466_length_46023_cov_7.038850_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_provirus.1466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538180" accession="ERS11140149">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140149</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538180</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_provirus.24</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.2363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_24_length_345304_cov_13.536299_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7096774193548387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_24_length_345304_cov_13.536299_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Clostridium_Q;s__Clostridium_Q sp003024715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_provirus.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538181" accession="ERS11140150">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140150</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538181</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_provirus.48</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_48_length_285625_cov_7.249328_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5645161290322581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_48_length_285625_cov_7.249328_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538182" accession="ERS11140151">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140151</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538182</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_provirus.909</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Hpunavirus provirus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_909_length_66394_cov_25.570563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738276_bin.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7352941176470589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_909_length_66394_cov_25.570563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME266127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Pasteurellaceae;g__Aggregatibacter;s__Aggregatibacter segnis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_provirus.909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Peduovirinae; Hpunavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538183" accession="ERS11140152">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140152</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538183</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_virus.1403</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1403_length_47688_cov_8.115457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6944444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1403_length_47688_cov_8.115457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538184" accession="ERS11140153">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140153</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538184</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_virus.1589</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1589_length_42982_cov_8.022795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1589_length_42982_cov_8.022795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_virus.1589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538185" accession="ERS11140154">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140154</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538185</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_virus.1824</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04966960352422907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1824_length_37996_cov_14.474010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_1824_length_37996_cov_14.474010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538186" accession="ERS11140155">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140155</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538186</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_virus.2425</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.714977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_2425_length_29786_cov_6.288195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_2425_length_29786_cov_6.288195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_virus.2425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538187" accession="ERS11140156">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140156</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538187</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738276_virus.343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738276.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.9956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738276) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558871) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_343_length_115602_cov_36.134153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5405405405405406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_5_1018__NODE_343_length_115602_cov_36.134153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_virus.343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538188" accession="ERS11140157">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140157</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538188</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_provirus.145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_145_length_104708_cov_5.794287_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738277_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_145_length_104708_cov_5.794287_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_provirus.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538189" accession="ERS11140158">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140158</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538189</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_provirus.37</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus provirus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05195538818076477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>229.261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_37_length_204844_cov_129.782607_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738277_bin.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_37_length_204844_cov_129.782607_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_provirus.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538190" accession="ERS11140159">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140159</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538190</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_virus.1111</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Aeromonas phage phiARM81mr virus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>437.719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_1111_length_31939_cov_264.447806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_1111_length_31939_cov_264.447806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.1111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Aeromonas phage phiARM81mr</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538191" accession="ERS11140160">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140160</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538191</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_virus.1719</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14936728245389252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.9292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_1719_length_22595_cov_4.164357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_1719_length_22595_cov_4.164357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0226686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538192" accession="ERS11140161">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140161</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538192</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_virus.252</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Maenadvirus virus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>376.259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_252_length_79561_cov_210.698795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8018867924528302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_252_length_79561_cov_210.698795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738260_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactiplantibacillus;s__Lactiplantibacillus plantarum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Tybeckvirinae; Maenadvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538193" accession="ERS11140162">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140162</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538193</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_virus.508</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.4374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_508_length_52122_cov_22.189355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_508_length_52122_cov_22.189355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538194" accession="ERS11140163">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140163</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538194</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_virus.657</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>365.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_657_length_45107_cov_205.585165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_657_length_45107_cov_205.585165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738551_virus.440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538195" accession="ERS11140164">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140164</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538195</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_virus.780</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.1137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_780_length_40759_cov_49.122782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_780_length_40759_cov_49.122782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746779_virus.1100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538196" accession="ERS11140165">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140165</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538196</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738277_virus.910</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738277.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.0084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738277) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567191) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_910_length_36631_cov_26.219976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_19_CHE1008TZ__NODE_910_length_36631_cov_26.219976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538197" accession="ERS11140166">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140166</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538197</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_provirus.1122</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1122_length_50039_cov_13.054121_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1122_length_50039_cov_13.054121_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_provirus.1122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538198" accession="ERS11140167">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140167</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538198</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_provirus.207</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_207_length_129718_cov_12.838060_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738278_bin.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.951219512195122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_207_length_129718_cov_12.838060_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UMGS363;s__UMGS363 sp900543105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_provirus.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538199" accession="ERS11140168">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140168</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538199</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_provirus.952</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.5476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_952_length_55912_cov_21.748384_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_952_length_55912_cov_21.748384_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_provirus.808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538200" accession="ERS11140169">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140169</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538200</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.1286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1286_length_45149_cov_9.046836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1286_length_45149_cov_9.046836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745755_provirus.795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538201" accession="ERS11140170">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140170</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538201</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.1435</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.4038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1435_length_41547_cov_22.607982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7837837837837838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1435_length_41547_cov_22.607982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.1435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538202" accession="ERS11140171">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140171</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538202</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.1659</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.5592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1659_length_36923_cov_34.064186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6129032258064516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1659_length_36923_cov_34.064186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME176098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.1659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538203" accession="ERS11140172">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140172</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538203</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.1989</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3004947229551451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.6475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1989_length_31737_cov_43.833607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_1989_length_31737_cov_43.833607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.1989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538204" accession="ERS11140173">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140173</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538204</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.3123</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_3123_length_21790_cov_7.740616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_3123_length_21790_cov_7.740616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746285_bin.189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_virus.5176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538205" accession="ERS11140174">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140174</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538205</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.3888</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_3888_length_18248_cov_11.715646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_3888_length_18248_cov_11.715646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.3888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538206" accession="ERS11140175">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140175</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538206</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.6231</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08312057094440387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.1321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_6231_length_12335_cov_23.394940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_6231_length_12335_cov_23.394940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp900552845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.6231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538207" accession="ERS11140176">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140176</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538207</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738278_virus.864</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738278.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>215.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738278) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562243) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_864_length_59043_cov_129.578418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9210526315789472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_3_1264__NODE_864_length_59043_cov_129.578418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_provirus.591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538208" accession="ERS11140177">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140177</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538208</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738352_provirus.279</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738352.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12637530562347188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738352) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_279_length_80314_cov_9.787392_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738352_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_279_length_80314_cov_9.787392_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738352_provirus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538209" accession="ERS11140178">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140178</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538209</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738352_provirus.816</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738352.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.67896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738352) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_816_length_38466_cov_4.808096_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738352_bin.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_816_length_38466_cov_4.808096_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola dorei</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738564_provirus.163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538210" accession="ERS11140179">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140179</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538210</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738352_virus.433</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738352.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738352) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_433_length_62305_cov_6.677059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8085106382978723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_433_length_62305_cov_6.677059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides merdae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738352_virus.433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538211" accession="ERS11140180">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140180</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538211</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738352_virus.685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738352.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.80048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738352) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_685_length_44086_cov_4.248290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_685_length_44086_cov_4.248290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0352447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538212" accession="ERS11140181">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140181</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538212</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738352_virus.901</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738352.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05214757709251101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738352) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_901_length_35971_cov_7.340698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_9_RAU0028AZ__NODE_901_length_35971_cov_7.340698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia sp900552665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738352_virus.901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538213" accession="ERS11140182">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140182</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538213</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_provirus.236</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>223.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_236_length_103411_cov_123.186647_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_236_length_103411_cov_123.186647_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745836_virus.193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538214" accession="ERS11140183">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140183</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538214</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_provirus.419</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>118.951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_419_length_76536_cov_68.419310_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_419_length_76536_cov_68.419310_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.1999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538215" accession="ERS11140184">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140184</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538215</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_provirus.99</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16857916465189376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_99_length_149282_cov_9.911732_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_99_length_149282_cov_9.911732_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_provirus.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538216" accession="ERS11140185">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140185</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538216</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.1179</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.57827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_1179_length_39874_cov_5.200266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_1179_length_39874_cov_5.200266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.1179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538217" accession="ERS11140186">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140186</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538217</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.132</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_132_length_135825_cov_8.687119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_132_length_135825_cov_8.687119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538218" accession="ERS11140187">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140187</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538218</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.1547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.4814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_1547_length_33095_cov_54.213066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_1547_length_33095_cov_54.213066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus sp900066115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.1547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538219" accession="ERS11140188">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140188</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538219</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.1912</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.3999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_1912_length_28509_cov_15.331422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_1912_length_28509_cov_15.331422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538220" accession="ERS11140189">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140189</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538220</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.2379</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_2379_length_24200_cov_16.838246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_2379_length_24200_cov_16.838246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.2379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538221" accession="ERS11140190">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140190</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538221</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.3223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0322136563876652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>288.676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_3223_length_19090_cov_161.691053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_3223_length_19090_cov_161.691053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_provirus.2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538222" accession="ERS11140191">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140191</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538222</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.51</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.5476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_51_length_190658_cov_18.777968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5128205128205128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_51_length_190658_cov_18.777968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538223" accession="ERS11140192">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140192</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538223</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.691</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Enterobacteria phage mEp021 virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_691_length_55804_cov_6.672493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8313253012048193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_691_length_55804_cov_6.672493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Enterobacteria phage mEp021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538224" accession="ERS11140193">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140193</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538224</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738353_virus.896</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738353.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.9527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738353) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567219) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_896_length_47946_cov_9.032923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738353_bin.165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6060606060606061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_15_CHE1003TZ__NODE_896_length_47946_cov_9.032923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538225" accession="ERS11140194">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140194</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538225</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738426_provirus.205</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.8417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567233) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_205_length_103756_cov_44.951851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_205_length_103756_cov_44.951851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738426_provirus.205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538226" accession="ERS11140195">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140195</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538226</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738426_provirus.610</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567233) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_610_length_55229_cov_9.832427_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_610_length_55229_cov_9.832427_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides merdae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738426_provirus.610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538227" accession="ERS11140196">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140196</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538227</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738426_virus.1070</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17747797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567233) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_1070_length_35620_cov_9.855555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738426_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_1070_length_35620_cov_9.855555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio;s__Desulfovibrio piger</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738426_virus.1070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538228" accession="ERS11140197">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140197</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538228</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738426_virus.254</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.0426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567233) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_254_length_93872_cov_15.641836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_254_length_93872_cov_15.641836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides uniformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738426_virus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538229" accession="ERS11140198">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140198</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538229</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738426_virus.728</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0255538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567233) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_728_length_48719_cov_13.266642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738426_bin.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_21_RAJ1012YZ__NODE_728_length_48719_cov_13.266642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio;s__Desulfovibrio piger</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738426_virus.728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538230" accession="ERS11140199">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140199</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538230</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738433_provirus.1714</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Streptococcus phage Javan351 provirus assembled from ERR7738433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02973568281938326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.83723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567237) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_1714_length_24645_cov_5.023893_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738433_bin.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_1714_length_24645_cov_5.023893_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745859_provirus.2938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Streptococcus phage Javan351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538231" accession="ERS11140200">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140200</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538231</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738433_provirus.642</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567237) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_642_length_49065_cov_6.520046_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738433_bin.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_642_length_49065_cov_6.520046_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738433_provirus.642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538232" accession="ERS11140201">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140201</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538232</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738433_virus.1184</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>221.241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567237) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_1184_length_33233_cov_128.725600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_1184_length_33233_cov_128.725600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus sp900066115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738433_virus.1184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538233" accession="ERS11140202">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140202</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538233</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738433_virus.223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured CrAss-like virus sp. virus assembled from ERR7738433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567237) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_223_length_98796_cov_6.503763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5154639175257731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_223_length_98796_cov_6.503763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738240_virus.289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; CrAss-like virus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538234" accession="ERS11140203">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140203</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538234</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738433_virus.404</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.9951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567237) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_404_length_66322_cov_18.678466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_404_length_66322_cov_18.678466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Intestinibacter;s__Intestinibacter bartlettii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538235" accession="ERS11140204">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140204</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538235</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738433_virus.7967</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.9769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567237) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_7967_length_7206_cov_3.956095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_7967_length_7206_cov_3.956095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738433_virus.7967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538236" accession="ERS11140205">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140205</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538236</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738433_virus.978</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.5564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567237) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_978_length_38007_cov_13.115028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5757575757575758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_11_CHE0013BZ__NODE_978_length_38007_cov_13.115028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738433_virus.978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538237" accession="ERS11140206">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140206</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538237</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_provirus.1309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>191.645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1309_length_50395_cov_126.431357_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1309_length_50395_cov_126.431357_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.2604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538238" accession="ERS11140207">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140207</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538238</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_provirus.3592</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Gokushovirus WZ-2015a provirus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>289.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_3592_length_22717_cov_171.620716_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738447_bin.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_3592_length_22717_cov_171.620716_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_provirus.3592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; unclassified Gokushovirinae; Gokushovirus WZ-2015a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538239" accession="ERS11140208">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140208</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538239</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_provirus.636</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1426160506300154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_636_length_80148_cov_9.086636_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738447_bin.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_636_length_80148_cov_9.086636_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738616_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4334;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_provirus.636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538240" accession="ERS11140209">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140209</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538240</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.107</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.9976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_107_length_179566_cov_21.896551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.984126984126984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_107_length_179566_cov_21.896551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538241" accession="ERS11140210">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140210</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538241</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.1425</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1425_length_47822_cov_13.184606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5357142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1425_length_47822_cov_13.184606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538242" accession="ERS11140211">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140211</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538242</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.1608</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>950.228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1608_length_43612_cov_529.220489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1608_length_43612_cov_529.220489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745823_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__SFJ001;s__SFJ001 sp004555865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_virus.928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538243" accession="ERS11140212">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140212</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538243</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.1968</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.8881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1968_length_37805_cov_27.161631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_1968_length_37805_cov_27.161631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.1968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538244" accession="ERS11140213">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140213</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538244</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.2244</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_2244_length_34006_cov_5.515400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_2244_length_34006_cov_5.515400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538245" accession="ERS11140214">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140214</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538245</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.2405</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.1526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_2405_length_31913_cov_26.008292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_2405_length_31913_cov_26.008292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900547315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_provirus.308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538246" accession="ERS11140215">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140215</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538246</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.2902</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.0306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_2902_length_27239_cov_4.102496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_2902_length_27239_cov_4.102496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.2902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538247" accession="ERS11140216">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140216</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538247</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.3740</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1673458149779736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.1745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_3740_length_22035_cov_3.626696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738447_bin.173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_3740_length_22035_cov_3.626696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746094_bin.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__UMGS1810;f__UMGS1810;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.3740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538248" accession="ERS11140217">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140217</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538248</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.4713</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.0507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_4713_length_18143_cov_21.441769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_4713_length_18143_cov_21.441769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.4713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538249" accession="ERS11140218">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140218</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538249</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738447_virus.648</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phage DP SC_6_H4_2017 virus assembled from ERR7738447.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>131.747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738447) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561837) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_648_length_79039_cov_75.559193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_22_1120__NODE_648_length_79039_cov_75.559193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Phage DP SC_6_H4_2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538250" accession="ERS11140219">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140219</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538250</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738451_provirus.118</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738451.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1218.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738451) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567433) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_118_length_131066_cov_749.239318_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38235294117647056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_118_length_131066_cov_749.239318_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738645_virus.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538251" accession="ERS11140220">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140220</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538251</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738451_provirus.543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738451.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738451) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567433) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_543_length_55040_cov_6.497207_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_543_length_55040_cov_6.497207_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738451_provirus.543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538252" accession="ERS11140221">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140221</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538252</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738451_virus.1643</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738451.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07897769092469892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.69115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738451) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567433) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_1643_length_22653_cov_4.122962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_1643_length_22653_cov_4.122962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0302247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538253" accession="ERS11140222">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140222</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538253</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738451_virus.433</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738451.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>157.782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738451) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567433) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_433_length_62624_cov_88.704734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_433_length_62624_cov_88.704734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0370616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538254" accession="ERS11140223">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140223</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538254</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738451_virus.90</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738451.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738451) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567433) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_90_length_151784_cov_11.820252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5423728813559322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_23_THA0068JZ__NODE_90_length_151784_cov_11.820252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738530_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium sp900539945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738451_virus.90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538255" accession="ERS11140224">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140224</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538255</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738460_provirus.1630</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738460.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0293080874004698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.07865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738460) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567617) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_17_RAJ1013YZ__NODE_1630_length_44043_cov_4.444298_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_17_RAJ1013YZ__NODE_1630_length_44043_cov_4.444298_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738460_provirus.1630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538256" accession="ERS11140225">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140225</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538256</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738460_virus.1484</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7738460.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738460) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567617) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_17_RAJ1013YZ__NODE_1484_length_46840_cov_6.180142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738460_bin.178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_17_RAJ1013YZ__NODE_1484_length_46840_cov_6.180142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738460_virus.1484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538257" accession="ERS11140226">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140226</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538257</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738460_virus.5433</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Decurrovirus virus assembled from ERR7738460.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.5024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738460) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567617) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_17_RAJ1013YZ__NODE_5433_length_17545_cov_43.745134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_17_RAJ1013YZ__NODE_5433_length_17545_cov_43.745134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738460_virus.5433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Decurrovirus; unclassified Decurrovirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538258" accession="ERS11140227">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140227</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538258</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_provirus.129</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage provirus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_129_length_136576_cov_12.571286_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738461_bin.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_129_length_136576_cov_12.571286_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002251385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738141_virus.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538259" accession="ERS11140228">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140228</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538259</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_provirus.412</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_412_length_86458_cov_9.542017_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>42.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_412_length_86458_cov_9.542017_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538260" accession="ERS11140229">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140229</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538260</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_provirus.8137</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. provirus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_8137_length_11169_cov_6.960512_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738461_bin.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_8137_length_11169_cov_6.960512_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_provirus.8137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538261" accession="ERS11140230">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140230</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538261</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_virus.1215</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_1215_length_45047_cov_7.248677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5128205128205128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_1215_length_45047_cov_7.248677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.1517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538262" accession="ERS11140231">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140231</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538262</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_virus.1686</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseobacter phage CRP-7 virus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6873898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>169.947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_1686_length_36751_cov_101.730190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_1686_length_36751_cov_101.730190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Roseobacter phage CRP-7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538263" accession="ERS11140232">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140232</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538263</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_virus.1970</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.0179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_1970_length_33167_cov_22.796525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738461_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_1970_length_33167_cov_22.796525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__CAG-196 sp900553895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.3054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538264" accession="ERS11140233">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140233</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538264</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_virus.2362</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.6913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_2362_length_29277_cov_25.482774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_2362_length_29277_cov_25.482774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538265" accession="ERS11140234">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140234</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538265</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_virus.2809</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_2809_length_25881_cov_12.499612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738461_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_2809_length_25881_cov_12.499612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745918_bin.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538266" accession="ERS11140235">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140235</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538266</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_virus.4179</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06497797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_4179_length_18979_cov_6.465348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_4179_length_18979_cov_6.465348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.1580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538267" accession="ERS11140236">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140236</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538267</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738461_virus.815</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738461.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>855.701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738461) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567674) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_815_length_57720_cov_513.699166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_K_2_2358__NODE_815_length_57720_cov_513.699166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738267_virus.372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538268" accession="ERS11140237">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140237</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538268</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_provirus.1385</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.0492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1385_length_46326_cov_16.379165_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9130434782608696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1385_length_46326_cov_16.379165_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_provirus.1385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538269" accession="ERS11140238">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140238</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538269</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_provirus.506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20049472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.6671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_506_length_94749_cov_14.496683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738472_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_506_length_94749_cov_14.496683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0258741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538270" accession="ERS11140239">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140239</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538270</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_provirus.980</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.1471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_980_length_60848_cov_14.288049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.902439024390244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_980_length_60848_cov_14.288049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_provirus.980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538271" accession="ERS11140240">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140240</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538271</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.1156</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14558091782530816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.2766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1156_length_53469_cov_54.233462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1156_length_53469_cov_54.233462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.1156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538272" accession="ERS11140241">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140241</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538272</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.1370</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Psavirus virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.9619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1370_length_46794_cov_31.246441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1370_length_46794_cov_31.246441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.1370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Psavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538273" accession="ERS11140242">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140242</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538273</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.1502</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.8196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1502_length_42877_cov_5.674790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8378378378378378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1502_length_42877_cov_5.674790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1750;g__UBA7102;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.1502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538274" accession="ERS11140243">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140243</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538274</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.1663</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>448.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1663_length_39624_cov_259.747794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43478260869565216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1663_length_39624_cov_259.747794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.1663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538275" accession="ERS11140244">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140244</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538275</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.1808</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.1878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1808_length_36820_cov_13.323844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_1808_length_36820_cov_13.323844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.1808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538276" accession="ERS11140245">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140245</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538276</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.2108</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08869765791341377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.7231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_2108_length_32522_cov_20.910402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4827586206896552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_2108_length_32522_cov_20.910402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538277" accession="ERS11140246">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140246</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538277</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.2432</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.8981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_2432_length_28340_cov_17.529455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_2432_length_28340_cov_17.529455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.2432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538278" accession="ERS11140247">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140247</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538278</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.2864</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Ceridwen virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.1785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_2864_length_24503_cov_14.387251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_2864_length_24503_cov_14.387251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.2864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Butyrivibrio virus Ceridwen</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538279" accession="ERS11140248">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140248</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538279</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.3946</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.14918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_3946_length_18055_cov_5.163867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_3946_length_18055_cov_5.163867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.3946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538280" accession="ERS11140249">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140249</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538280</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738472_virus.975</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738472.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2955.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738472) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_975_length_60974_cov_1703.058739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_15_RAU1013YZ__NODE_975_length_60974_cov_1703.058739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538281" accession="ERS11140250">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140250</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538281</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_provirus.256</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_256_length_151653_cov_7.451503_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738493_bin.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_256_length_151653_cov_7.451503_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Lentisphaeria;o__Victivallales;f__UBA1829;g__UBA1829;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738493_provirus.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538282" accession="ERS11140251">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140251</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538282</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_provirus.675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>205.632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_675_length_85641_cov_125.196333_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738493_bin.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_675_length_85641_cov_125.196333_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME232472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RFN20;f__CAG-288;g__CAG-568;s__CAG-568 sp000434395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738493_provirus.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538283" accession="ERS11140252">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140252</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538283</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_virus.135</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_135_length_204886_cov_66.586766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_135_length_204886_cov_66.586766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738493_virus.135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538284" accession="ERS11140253">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140253</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538284</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_virus.1574</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_1574_length_43481_cov_25.633144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_1574_length_43481_cov_25.633144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_virus.1371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538285" accession="ERS11140254">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140254</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538285</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_virus.1763</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.4361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_1763_length_39034_cov_19.909131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_1763_length_39034_cov_19.909131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538286" accession="ERS11140255">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140255</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538286</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_virus.1980</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3799559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.2952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_1980_length_35123_cov_35.591936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_1980_length_35123_cov_35.591936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_provirus.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538287" accession="ERS11140256">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140256</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538287</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_virus.2579</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.98897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_2579_length_27176_cov_5.228459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_2579_length_27176_cov_5.228459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0281755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538288" accession="ERS11140257">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140257</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538288</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_virus.4085</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>297.032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_4085_length_17039_cov_178.744311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_4085_length_17039_cov_178.744311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538289" accession="ERS11140258">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140258</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538289</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738493_virus.532</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738493.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738493) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567297) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_532_length_101473_cov_32.843367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_15_RAU0016YZ__NODE_532_length_101473_cov_32.843367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738551_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia sp900756925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738493_virus.532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538290" accession="ERS11140259">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140259</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538290</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738513_provirus.1064</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738513.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04054736765072696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.0238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738513) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567487) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_1064_length_39477_cov_35.087335_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738513_bin.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_1064_length_39477_cov_35.087335_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME246739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;s__Lachnospira eligens_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738513_provirus.1064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538291" accession="ERS11140260">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140260</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538291</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738513_provirus.353</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Streptococcus phage Javan351 provirus assembled from ERR7738513.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04655891240983966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.9881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738513) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567487) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_353_length_96217_cov_17.145070_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738513_bin.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_353_length_96217_cov_17.145070_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__UMGS1375;s__UMGS1375 sp900066615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738513_provirus.353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Streptococcus phage Javan351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538292" accession="ERS11140261">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140261</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538292</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738513_provirus.669</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738513.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.8075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738513) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567487) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_669_length_60162_cov_15.170192_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.926829268292683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_669_length_60162_cov_15.170192_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_provirus.980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538293" accession="ERS11140262">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140262</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538293</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738513_virus.1199</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7738513.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738513) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567487) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_1199_length_35204_cov_7.083241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.45714285714285713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_1199_length_35204_cov_7.083241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738513_virus.1199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538294" accession="ERS11140263">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140263</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538294</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738513_virus.2034</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738513.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.36324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738513) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567487) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_2034_length_20913_cov_4.350451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_2034_length_20913_cov_4.350451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME232880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Fusobacteriota;c__Fusobacteriia;o__Fusobacteriales;f__Fusobacteriaceae;g__Fusobacterium_A;s__Fusobacterium_A mortiferum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738513_virus.2034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538295" accession="ERS11140264">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140264</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538295</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738513_virus.858</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738513.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738513) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567487) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_858_length_48477_cov_6.222500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_13_THA1070JZ__NODE_858_length_48477_cov_6.222500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738513_virus.858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538296" accession="ERS11140265">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140265</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538296</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_provirus.1522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Akkermansia phage DTMo-2021a provirus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11299472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.7722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_1522_length_55695_cov_50.905498_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738514_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_1522_length_55695_cov_50.905498_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Verrucomicrobiae;o__Verrucomicrobiales;f__Akkermansiaceae;g__Akkermansia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738514_provirus.1522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Akkermansia phage DTMo-2021a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538297" accession="ERS11140266">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140266</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538297</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_provirus.3151</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.9323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_3151_length_29375_cov_27.382245_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738514_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_3151_length_29375_cov_27.382245_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME121515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__RF32;f__CAG-239;g__51-20;s__51-20 sp001917175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0244827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538298" accession="ERS11140267">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140267</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538298</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_provirus.585</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe provirus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_585_length_108532_cov_32.698852_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_585_length_108532_cov_32.698852_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538299" accession="ERS11140268">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140268</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538299</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_provirus.932</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.83146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_932_length_78469_cov_5.478965_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738514_bin.171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7560975609756098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_932_length_78469_cov_5.478965_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738514_provirus.932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538300" accession="ERS11140269">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140269</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538300</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_virus.1555</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>982.335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_1555_length_54700_cov_552.940135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_1555_length_54700_cov_552.940135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0370200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538301" accession="ERS11140270">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140270</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538301</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_virus.1960</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_1960_length_45366_cov_22.213518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43478260869565216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_1960_length_45366_cov_22.213518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0330522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538302" accession="ERS11140271">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140271</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538302</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_virus.2184</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_2184_length_41490_cov_11.844131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_2184_length_41490_cov_11.844131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738547_virus.1799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538303" accession="ERS11140272">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140272</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538303</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_virus.2898</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_2898_length_31785_cov_6.252618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_2898_length_31785_cov_6.252618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0338577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538304" accession="ERS11140273">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140273</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538304</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_virus.356</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1748898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.2081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_356_length_144896_cov_34.042916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7037037037037037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_356_length_144896_cov_34.042916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0375643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538305" accession="ERS11140274">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140274</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538305</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738514_virus.72</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738514.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.8048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738514) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567069) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_72_length_293869_cov_37.211306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_C_23_7024__NODE_72_length_293869_cov_37.211306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538306" accession="ERS11140275">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140275</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538306</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738515_provirus.2</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738515.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.0287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738515) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567081) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_2_length_686296_cov_22.377683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738515_bin.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8214285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_2_length_686296_cov_22.377683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738515_provirus.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538307" accession="ERS11140276">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140276</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538307</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738515_provirus.60</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738515.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.0327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738515) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567081) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_60_length_203813_cov_29.903797_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738515_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_60_length_203813_cov_29.903797_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0298777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538308" accession="ERS11140277">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140277</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538308</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738515_virus.1562</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738515.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07381662166094104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.16515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738515) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567081) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_1562_length_22330_cov_5.229003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_1562_length_22330_cov_5.229003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738515_virus.1562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538309" accession="ERS11140278">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140278</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538309</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738515_virus.2432</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738515.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17820990407225873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>118.597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738515) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567081) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_2432_length_13181_cov_69.728098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_2432_length_13181_cov_69.728098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738515_virus.2432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538310" accession="ERS11140279">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140279</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538310</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738515_virus.927</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738515.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738515) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567081) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_927_length_40172_cov_6.370271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6511627906976745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_11_CHE0005SZ__NODE_927_length_40172_cov_6.370271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738515_virus.927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538311" accession="ERS11140280">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140280</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538311</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738527_provirus.20</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738527.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>201.062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738527) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559349) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_20_length_223125_cov_142.938448_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_20_length_223125_cov_142.938448_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738569_provirus.178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538312" accession="ERS11140281">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140281</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538312</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738527_provirus.685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738527.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>390.845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738527) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559349) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_685_length_51687_cov_222.612711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_685_length_51687_cov_222.612711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538313" accession="ERS11140282">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140282</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538313</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738527_virus.1111</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738527.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.4824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738527) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559349) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_1111_length_38808_cov_20.235419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_1111_length_38808_cov_20.235419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738527_virus.1111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538314" accession="ERS11140283">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140283</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538314</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738527_virus.1412</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738527.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>644.333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738527) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559349) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_1412_length_32870_cov_369.054890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_1412_length_32870_cov_369.054890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.1563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538315" accession="ERS11140284">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140284</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538315</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738527_virus.23</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738527.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738527) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559349) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_23_length_219699_cov_8.439036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9347826086956522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_23_length_219699_cov_8.439036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia sp900552665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738527_virus.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538316" accession="ERS11140285">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140285</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538316</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738527_virus.453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738527.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.9802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738527) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559349) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_453_length_64984_cov_13.512672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5116279069767442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_453_length_64984_cov_13.512672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538317" accession="ERS11140286">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140286</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538317</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738527_virus.851</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738527.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738527) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559349) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_851_length_45846_cov_7.707378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47368421052631576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_10_2622__NODE_851_length_45846_cov_7.707378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745859_virus.1405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538318" accession="ERS11140287">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140287</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538318</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_provirus.1189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.7417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1189_length_52138_cov_36.589174_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1189_length_52138_cov_36.589174_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0369449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538319" accession="ERS11140288">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140288</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538319</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_provirus.172</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>285.002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_172_length_158038_cov_170.620191_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738529_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_172_length_158038_cov_170.620191_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737972_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__RUG115;s__RUG115 sp900066395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_provirus.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538320" accession="ERS11140289">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140289</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538320</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_provirus.342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>199.916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_342_length_115702_cov_124.209505_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738529_bin.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_342_length_115702_cov_124.209505_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella stercorea</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0318509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538321" accession="ERS11140290">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140290</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538321</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_provirus.736</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10049472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_736_length_74410_cov_6.277199_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_736_length_74410_cov_6.277199_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_provirus.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538322" accession="ERS11140291">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140291</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538322</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_virus.1133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1133_length_53864_cov_7.421124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1133_length_53864_cov_7.421124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538323" accession="ERS11140292">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140292</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538323</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_virus.1575</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.8302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1575_length_41105_cov_29.375012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1575_length_41105_cov_29.375012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_virus.1575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538324" accession="ERS11140293">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140293</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538324</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_virus.1959</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.8984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1959_length_34250_cov_20.525883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_1959_length_34250_cov_20.525883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_virus.1959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538325" accession="ERS11140294">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140294</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538325</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_virus.2575</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_2575_length_26430_cov_12.349865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_2575_length_26430_cov_12.349865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538326" accession="ERS11140295">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140295</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538326</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738529_virus.452</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738529.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738529) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_452_length_100217_cov_8.024665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2608695652173913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_5_RAU1020YZ__NODE_452_length_100217_cov_8.024665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_virus.452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538327" accession="ERS11140296">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140296</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538327</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738530_provirus.13</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus provirus assembled from ERR7738530.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9144553881807648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>387.658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738530) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560656) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_13_length_278863_cov_237.909041_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738530_bin.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_13_length_278863_cov_237.909041_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738530_provirus.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538328" accession="ERS11140297">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140297</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538328</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738530_provirus.4</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738530.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>124.896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738530) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560656) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_4_length_494412_cov_78.509873_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738530_bin.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_4_length_494412_cov_78.509873_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738530_provirus.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538329" accession="ERS11140298">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140298</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538329</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738530_virus.376</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738530.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.2447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738530) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560656) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_376_length_65402_cov_4.060390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_376_length_65402_cov_4.060390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738530_virus.376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538330" accession="ERS11140299">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140299</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538330</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738530_virus.980</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738530.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.8886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738530) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560656) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_980_length_30771_cov_22.693686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_13_1725__NODE_980_length_30771_cov_22.693686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME089567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus sp001556435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.2344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538331" accession="ERS11140300">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140300</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538331</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_provirus.1680</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17563694053537307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_1680_length_39854_cov_7.573522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738531_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_1680_length_39854_cov_7.573522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_provirus.210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538332" accession="ERS11140301">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140301</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538332</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_provirus.3213</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10658093884155775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.9618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_3213_length_27586_cov_20.832637_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738531_bin.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_3213_length_27586_cov_20.832637_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738531_provirus.3213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538333" accession="ERS11140302">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140302</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538333</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_provirus.544</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_544_length_71578_cov_11.847261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738531_bin.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_544_length_71578_cov_11.847261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746739_provirus.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538334" accession="ERS11140303">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140303</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538334</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_provirus.97</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07995594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_97_length_155930_cov_11.541549_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738531_bin.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_97_length_155930_cov_11.541549_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0323692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538335" accession="ERS11140304">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140304</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538335</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.1361</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:55Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_1361_length_44759_cov_26.588000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738531_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9655172413793104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_1361_length_44759_cov_26.588000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_virus.1549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538336" accession="ERS11140305">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140305</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538336</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.1679</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.8498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_1679_length_39854_cov_35.175906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_1679_length_39854_cov_35.175906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.1477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538337" accession="ERS11140306">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140306</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538337</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.2045</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_2045_length_35835_cov_6.184266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_2045_length_35835_cov_6.184266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538338" accession="ERS11140307">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140307</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538338</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.2623</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_2623_length_30982_cov_6.952823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_2623_length_30982_cov_6.952823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_virus.1289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538339" accession="ERS11140308">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140308</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538339</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.349</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.7603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_349_length_88584_cov_25.814670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.41379310344827586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_349_length_88584_cov_25.814670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_virus.382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538340" accession="ERS11140309">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140309</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538340</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.4267</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>4.9472295514511e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.9528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_4267_length_23051_cov_4.795682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_4267_length_23051_cov_4.795682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738273_bin.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_F;s__Ruminococcus_F champanellensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745582_virus.1597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538341" accession="ERS11140310">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140310</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538341</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.5614</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_5614_length_19077_cov_7.581947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_5614_length_19077_cov_7.581947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747083_virus.1639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538342" accession="ERS11140311">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140311</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538342</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738531_virus.960</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738531.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738531) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_960_length_53832_cov_11.123003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_10_1530__NODE_960_length_53832_cov_11.123003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738531_virus.960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538343" accession="ERS11140312">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140312</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538343</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_provirus.255</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_255_length_104894_cov_45.798716_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5161290322580645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_255_length_104894_cov_45.798716_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_provirus.255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538344" accession="ERS11140313">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140313</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538344</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_provirus.820</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20847882985254676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.39895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_820_length_56786_cov_5.511153_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_820_length_56786_cov_5.511153_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538345" accession="ERS11140314">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140314</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538345</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.1240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.7808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1240_length_44281_cov_50.348317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1240_length_44281_cov_50.348317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.1240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538346" accession="ERS11140315">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140315</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538346</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.1369</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.3575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1369_length_42373_cov_48.667936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1369_length_42373_cov_48.667936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746398_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Oribacterium;s__Oribacterium sp900772695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538347" accession="ERS11140316">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140316</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538347</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.1577</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1577_length_38751_cov_9.567410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1577_length_38751_cov_9.567410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0339015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538348" accession="ERS11140317">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140317</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538348</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.1811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1811_length_35847_cov_12.310092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_1811_length_35847_cov_12.310092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.1811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538349" accession="ERS11140318">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140318</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538349</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.2034</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>175.473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_2034_length_32962_cov_106.362171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_2034_length_32962_cov_106.362171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME256853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-302;s__CAG-302 sp001916775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.2034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538350" accession="ERS11140319">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140319</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538350</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.2455</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.8736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_2455_length_28943_cov_21.210421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_2455_length_28943_cov_21.210421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538351" accession="ERS11140320">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140320</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538351</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.3054</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_3054_length_24562_cov_6.655993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_3054_length_24562_cov_6.655993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R sp900542875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.3054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538352" accession="ERS11140321">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140321</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538352</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.403</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.6905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_403_length_81558_cov_9.782563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738532_bin.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_403_length_81558_cov_9.782563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538353" accession="ERS11140322">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140322</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538353</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.467</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24245594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.8139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_467_length_75590_cov_19.452598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_467_length_75590_cov_19.452598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538354" accession="ERS11140323">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140323</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538354</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.63</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.012200000000007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_63_length_197736_cov_79.432841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_63_length_197736_cov_79.432841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp003514385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538355" accession="ERS11140324">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140324</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538355</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738532_virus.8805</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738532.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08284611230643936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.7801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738532) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560754) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_8805_length_10500_cov_3.812818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_3_1732__NODE_8805_length_10500_cov_3.812818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.8805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538356" accession="ERS11140325">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140325</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538356</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738533_provirus.161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738533.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>224.971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738533) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_161_length_116745_cov_132.680992_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738533_bin.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_161_length_116745_cov_132.680992_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.1094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538357" accession="ERS11140326">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140326</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538357</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738533_provirus.275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis provirus assembled from ERR7738533.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738533) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_275_length_87636_cov_7.805423_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738533_bin.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6739130434782609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_275_length_87636_cov_7.805423_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Oribacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_provirus.582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538358" accession="ERS11140327">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140327</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538358</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738533_provirus.727</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738533.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738533) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_727_length_48504_cov_7.135420_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6744186046511628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_727_length_48504_cov_7.135420_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0362748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538359" accession="ERS11140328">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140328</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538359</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738533_virus.206</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738533.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>615.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738533) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_206_length_103487_cov_360.607253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_206_length_103487_cov_360.607253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738533_virus.206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538360" accession="ERS11140329">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140329</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538360</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738533_virus.747</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738533.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>936.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738533) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_747_length_47756_cov_631.610646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_747_length_47756_cov_631.610646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738533_virus.747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538361" accession="ERS11140330">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140330</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538361</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738533_virus.956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738533.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738533) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_956_length_39684_cov_8.413235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_13_RAU1011YZ__NODE_956_length_39684_cov_8.413235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738533_virus.956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538362" accession="ERS11140331">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140331</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538362</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_provirus.179</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12339347995716636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_179_length_122435_cov_12.194348_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_179_length_122435_cov_12.194348_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745388_virus.661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538363" accession="ERS11140332">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140332</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538363</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_provirus.53</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.927477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.8985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_53_length_213706_cov_16.614743_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738534_bin.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9428571428571428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_53_length_213706_cov_16.614743_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745625_virus.169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538364" accession="ERS11140333">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140333</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538364</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_virus.1065</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.5125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_1065_length_46811_cov_31.693671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_1065_length_46811_cov_31.693671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738534_virus.1065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538365" accession="ERS11140334">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140334</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538365</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_virus.13486</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Inovirus sp. virus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.94587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_13486_length_6283_cov_5.111988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_13486_length_6283_cov_5.111988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745431_virus.9041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae; Inovirus; unclassified Inovirus; Inovirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538366" accession="ERS11140335">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140335</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538366</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_virus.1651</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.98818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_1651_length_35083_cov_4.307490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7931034482758621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_1651_length_35083_cov_4.307490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0272411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538367" accession="ERS11140336">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140336</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538367</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_virus.239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade sp. virus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3812.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_239_length_104399_cov_2150.153745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5161290322580645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_239_length_104399_cov_2150.153745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; UAG-readthrough crAss clade sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538368" accession="ERS11140337">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140337</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538368</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_virus.303</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>483.598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_303_length_94037_cov_276.710260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7317073170731707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_303_length_94037_cov_276.710260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538369" accession="ERS11140338">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140338</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538369</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_virus.5038</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.4595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_5038_length_14510_cov_15.629876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_5038_length_14510_cov_15.629876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_virus.6384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538370" accession="ERS11140339">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140339</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538370</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738534_virus.952</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738534.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738534) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561146) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_952_length_50310_cov_15.179782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738534_bin.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_3_1599__NODE_952_length_50310_cov_15.179782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738227_virus.620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538371" accession="ERS11140340">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140340</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538371</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738535_provirus.22</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738535.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>149.204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738535) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_22_length_355978_cov_87.787882_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738535_bin.103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_22_length_355978_cov_87.787882_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738155_virus.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538372" accession="ERS11140341">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140341</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538372</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738535_provirus.417</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus provirus assembled from ERR7738535.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738535) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_417_length_99601_cov_12.156565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738535_bin.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_417_length_99601_cov_12.156565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Bilophila;s__Bilophila wadsworthia</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738535_provirus.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538373" accession="ERS11140342">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140342</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538373</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738535_provirus.8</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738535.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.2529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738535) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_8_length_460434_cov_24.912470_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_8_length_460434_cov_24.912470_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-127;s__CAG-127 sp900319515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0351633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538374" accession="ERS11140343">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140343</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538374</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738535_virus.1006</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738535.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>145.357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738535) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_1006_length_53691_cov_87.435465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7567567567567568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_1006_length_53691_cov_87.435465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0320321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538375" accession="ERS11140344">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140344</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538375</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738535_virus.1607</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738535.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.1989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738535) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_1607_length_35393_cov_26.274635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_1607_length_35393_cov_26.274635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-127;s__CAG-127 sp900319515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0352626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538376" accession="ERS11140345">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140345</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538376</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738535_virus.2077</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Pbunavirus virus assembled from ERR7738535.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17325395693569484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>185.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738535) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_2077_length_27742_cov_106.553985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.36585365853658536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_2077_length_27742_cov_106.553985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0277945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Pbunavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538377" accession="ERS11140346">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140346</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538377</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738535_virus.9057</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738535.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.2298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738535) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_9057_length_6237_cov_36.506981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_5_THA1056YZ__NODE_9057_length_6237_cov_36.506981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola dorei</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738535_virus.9057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538378" accession="ERS11140347">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140347</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538378</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_provirus.188</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2095814977973569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_188_length_131773_cov_9.263167_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_188_length_131773_cov_9.263167_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538379" accession="ERS11140348">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140348</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538379</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_provirus.385</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_385_length_93487_cov_8.367091_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_385_length_93487_cov_8.367091_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.1152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538380" accession="ERS11140349">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140349</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538380</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_provirus.761</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.2002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_761_length_63903_cov_8.056356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738536_bin.114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8484848484848485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_761_length_63903_cov_8.056356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_provirus.761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538381" accession="ERS11140350">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140350</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538381</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.12639</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>132.682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_12639_length_8654_cov_79.137461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_12639_length_8654_cov_79.137461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900290275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.12639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538382" accession="ERS11140351">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140351</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538382</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.1530</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_1530_length_42556_cov_10.084865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7073170731707317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_1530_length_42556_cov_10.084865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745823_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__SFJ001;s__SFJ001 sp004555865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.1530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538383" accession="ERS11140352">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140352</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538383</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.1666</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.78916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_1666_length_40521_cov_4.910790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_1666_length_40521_cov_4.910790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.1666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538384" accession="ERS11140353">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140353</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538384</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.207</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>313.974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_207_length_127831_cov_189.135166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_207_length_127831_cov_189.135166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738204_virus.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538385" accession="ERS11140354">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140354</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538385</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.302</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>542.677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_302_length_105790_cov_298.920540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9801980198019802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_302_length_105790_cov_298.920540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745577_virus.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538386" accession="ERS11140355">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140355</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538386</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.465</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.5083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_465_length_85546_cov_18.675578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_465_length_85546_cov_18.675578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538387" accession="ERS11140356">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140356</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538387</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.65</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_65_length_200591_cov_39.827583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_65_length_200591_cov_39.827583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538388" accession="ERS11140357">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140357</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538388</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738536_virus.859</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738536.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738536) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_859_length_59906_cov_6.093266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9574468085106383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_15_1738__NODE_859_length_59906_cov_6.093266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_virus.859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538389" accession="ERS11140358">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140358</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538389</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_provirus.11</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacillus virus 125 provirus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:05Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.1596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_11_length_407929_cov_11.321112_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738537_bin.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_11_length_407929_cov_11.321112_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.1424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Elmenteitavirus; Bacillus virus 125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538390" accession="ERS11140359">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140359</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538390</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_provirus.186</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_186_length_147341_cov_12.610509_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738537_bin.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_186_length_147341_cov_12.610509_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538391" accession="ERS11140360">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140360</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538391</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_provirus.378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_378_length_109472_cov_14.108149_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738537_bin.114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_378_length_109472_cov_14.108149_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0354042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538392" accession="ERS11140361">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140361</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538392</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_provirus.765</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.2582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_765_length_74244_cov_8.812787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738537_bin.133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_765_length_74244_cov_8.812787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME006546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-605;s__CAG-605 sp000433255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538393" accession="ERS11140362">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140362</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538393</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.11501</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>200.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_11501_length_10722_cov_119.101861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_11501_length_10722_cov_119.101861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.12584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538394" accession="ERS11140363">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140363</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538394</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.1670</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_1670_length_46996_cov_9.889171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_1670_length_46996_cov_9.889171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538395" accession="ERS11140364">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140364</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538395</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.1997</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.02169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_1997_length_41510_cov_5.000121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_1997_length_41510_cov_5.000121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.2031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538396" accession="ERS11140365">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140365</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538396</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.2336</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_2336_length_37345_cov_6.979151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_2336_length_37345_cov_6.979151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.1832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538397" accession="ERS11140366">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140366</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538397</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.2613</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_2613_length_34450_cov_6.205277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_2613_length_34450_cov_6.205277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738537_virus.2613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538398" accession="ERS11140367">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140367</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538398</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.3351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_3351_length_28612_cov_7.044437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_3351_length_28612_cov_7.044437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900066885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.3815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538399" accession="ERS11140368">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140368</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538399</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.4012</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_4012_length_24840_cov_6.938134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_4012_length_24840_cov_6.938134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738145_provirus.803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538400" accession="ERS11140369">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140369</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538400</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.455</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.5064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_455_length_99305_cov_11.283378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9032258064516128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_455_length_99305_cov_11.283378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538401" accession="ERS11140370">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140370</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538401</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.545</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.2914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_545_length_90932_cov_20.621507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_545_length_90932_cov_20.621507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538402" accession="ERS11140371">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140371</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538402</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.7333</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03729583153820042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_7333_length_15381_cov_7.649765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_7333_length_15381_cov_7.649765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.10576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538403" accession="ERS11140372">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140372</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538403</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738537_virus.977</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738537.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12578965839962564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738537) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_977_length_63895_cov_13.404823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_6_2213__NODE_977_length_63895_cov_13.404823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.1218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538404" accession="ERS11140373">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140373</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538404</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_provirus.204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04493392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>104.355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_204_length_114886_cov_67.193704_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738544_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_204_length_114886_cov_67.193704_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_provirus.204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538405" accession="ERS11140374">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140374</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538405</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_provirus.335</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Psavirus provirus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.4072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_335_length_89948_cov_45.781743_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738544_bin.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_335_length_89948_cov_45.781743_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME261291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;s__NK3B98 sp900545815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0194573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Psavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538406" accession="ERS11140375">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140375</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538406</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.1083</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1083_length_45487_cov_14.042348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1083_length_45487_cov_14.042348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0316936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538407" accession="ERS11140376">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140376</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538407</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.1275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1275_length_41237_cov_25.864942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1275_length_41237_cov_25.864942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_virus.1275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538408" accession="ERS11140377">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140377</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538408</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.1449</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1449_length_38203_cov_10.183182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1449_length_38203_cov_10.183182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_virus.1449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538409" accession="ERS11140378">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140378</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538409</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.1676</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.0202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1676_length_34288_cov_43.281225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_1676_length_34288_cov_43.281225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_virus.1676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538410" accession="ERS11140379">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140379</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538410</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.2203</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Uwajimavirus virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16349074572837724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_2203_length_28636_cov_6.921811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5652173913043478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_2203_length_28636_cov_6.921811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_virus.2203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Uwajimavirus; unclassified Uwajimavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538411" accession="ERS11140380">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140380</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538411</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.3130</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lactobacillus phage JNU_P7 virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.61906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_3130_length_22319_cov_4.972215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_3130_length_22319_cov_4.972215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus paragasseri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_virus.3130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lactobacillus phage JNU_P7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538412" accession="ERS11140381">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140381</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538412</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.4516</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>169.587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_4516_length_17014_cov_97.736671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_4516_length_17014_cov_97.736671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_virus.4516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538413" accession="ERS11140382">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140382</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538413</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738544_virus.678</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>117.952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560311) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_678_length_61032_cov_66.650808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9655172413793104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_1_1742__NODE_678_length_61032_cov_66.650808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME111561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_virus.678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538414" accession="ERS11140383">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140383</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538414</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_provirus.204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.8792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_204_length_102130_cov_33.497036_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4482758620689655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_204_length_102130_cov_33.497036_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_provirus.688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538415" accession="ERS11140384">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140384</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538415</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_provirus.650</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.96609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_650_length_53408_cov_4.869607_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7575757575757576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_650_length_53408_cov_4.869607_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0339015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538416" accession="ERS11140385">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140385</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538416</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_virus.1055</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_1055_length_39144_cov_6.816828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7741935483870968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_1055_length_39144_cov_6.816828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738545_virus.1055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538417" accession="ERS11140386">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140386</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538417</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_virus.1272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_1272_length_33989_cov_6.580886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_1272_length_33989_cov_6.580886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738545_virus.1272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538418" accession="ERS11140387">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140387</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538418</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_virus.1699</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>121.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_1699_length_27338_cov_68.365504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_1699_length_27338_cov_68.365504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0299046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538419" accession="ERS11140388">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140388</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538419</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_virus.294</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>476.283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_294_length_84465_cov_264.370456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3888888888888889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_294_length_84465_cov_264.370456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738545_virus.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538420" accession="ERS11140389">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140389</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538420</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_virus.472</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.1276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_472_length_64237_cov_25.182700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_472_length_64237_cov_25.182700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0358019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538421" accession="ERS11140390">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140390</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538421</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_virus.747</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01734581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>116.278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_747_length_49139_cov_64.623476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_747_length_49139_cov_64.623476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738545_virus.747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538422" accession="ERS11140391">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140391</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538422</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738545_virus.911</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.41964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_911_length_42710_cov_5.097389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_21_CHE1011TZ__NODE_911_length_42710_cov_5.097389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738545_virus.911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538423" accession="ERS11140392">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140392</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538423</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_provirus.138</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_138_length_193508_cov_9.691942_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_138_length_193508_cov_9.691942_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides cellulosilyticus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738546_provirus.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538424" accession="ERS11140393">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140393</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538424</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_provirus.1848</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>116.479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_1848_length_42788_cov_73.855026_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_1848_length_42788_cov_73.855026_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738546_provirus.1848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538425" accession="ERS11140394">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140394</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538425</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_provirus.415</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_415_length_112319_cov_9.724453_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738546_bin.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9722222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_415_length_112319_cov_9.724453_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738546_provirus.415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538426" accession="ERS11140395">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140395</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538426</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_provirus.681</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10236784140969166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_681_length_84342_cov_5.797057_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738546_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_681_length_84342_cov_5.797057_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_G;s__Eubacterium_G ventriosum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0261300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538427" accession="ERS11140396">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140396</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538427</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_provirus.940</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3252.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_940_length_68410_cov_1838.652159_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5652173913043478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_940_length_68410_cov_1838.652159_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538428" accession="ERS11140397">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140397</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538428</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.121</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>77.058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_121_length_204103_cov_45.262354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_121_length_204103_cov_45.262354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola dorei</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538429" accession="ERS11140398">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140398</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538429</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.1474</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_1474_length_50658_cov_9.382495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9295774647887324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_1474_length_50658_cov_9.382495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0371591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538430" accession="ERS11140399">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140399</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538430</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.1749</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.05957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_1749_length_44716_cov_4.893501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_1749_length_44716_cov_4.893501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738546_virus.1749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538431" accession="ERS11140400">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140400</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538431</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.2127</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_2127_length_38397_cov_12.177427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6774193548387096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_2127_length_38397_cov_12.177427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738662_provirus.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538432" accession="ERS11140401">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140401</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538432</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.2707</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12241189427312778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_2707_length_30867_cov_7.855732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8787878787878788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_2707_length_30867_cov_7.855732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538433" accession="ERS11140402">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140402</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538433</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.4162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05236784140969162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.01884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_4162_length_20528_cov_4.301061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_4162_length_20528_cov_4.301061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0342086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538434" accession="ERS11140403">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140403</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538434</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.584</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured CrAss-like virus sp. virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17233342861947878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>146.508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_584_length_92079_cov_80.226104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.986842105263158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_584_length_92079_cov_80.226104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0362043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; CrAss-like virus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538435" accession="ERS11140404">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140404</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538435</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738546_virus.888</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738546.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.6684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738546) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_888_length_70890_cov_48.830624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9791666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_4_7003__NODE_888_length_70890_cov_48.830624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0374335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538436" accession="ERS11140405">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140405</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538436</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_provirus.1291</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Winogradskyella phage Peternella_1 provirus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1130538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>153.513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1291_length_53524_cov_94.982487_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5172413793103449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1291_length_53524_cov_94.982487_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0363290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Winogradskyella phage Peternella_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538437" accession="ERS11140406">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140406</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538437</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_provirus.177</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_177_length_165483_cov_17.319420_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_177_length_165483_cov_17.319420_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738547_provirus.177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538438" accession="ERS11140407">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140407</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538438</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_provirus.291</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.9044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_291_length_130090_cov_12.658765_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738547_bin.123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_291_length_130090_cov_12.658765_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Marinifilaceae;g__Odoribacter;s__Odoribacter splanchnicus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738547_provirus.291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538439" accession="ERS11140408">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140408</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538439</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_provirus.681</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus provirus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05830901147062301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_681_length_83757_cov_42.879649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_681_length_83757_cov_42.879649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME058212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp900539375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738547_provirus.681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538440" accession="ERS11140409">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140409</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538440</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_virus.1197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>246.668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1197_length_56827_cov_141.017427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.59375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1197_length_56827_cov_141.017427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738514_provirus.899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538441" accession="ERS11140410">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140410</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538441</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_virus.1453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.0891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1453_length_48933_cov_18.370129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1453_length_48933_cov_18.370129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538442" accession="ERS11140411">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140411</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538442</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_virus.1826</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>4.9472295514511e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.9184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1826_length_41105_cov_24.972238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_1826_length_41105_cov_24.972238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0285998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538443" accession="ERS11140412">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140412</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538443</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_virus.3234</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.17049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_3234_length_24192_cov_5.186481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_3234_length_24192_cov_5.186481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738665_bin.322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UMGS1623;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738547_virus.3234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538444" accession="ERS11140413">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140413</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538444</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738547_virus.5919</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738547.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738547) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_5919_length_12334_cov_10.268230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_11_1024__NODE_5919_length_12334_cov_10.268230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME104932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_I;s__Eubacterium_I ramulus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738547_virus.5919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538445" accession="ERS11140414">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140414</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538445</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738548_provirus.448</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738548.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.66751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738548) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567341) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_17_RAU1026YZ__NODE_448_length_47763_cov_5.788722_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_17_RAU1026YZ__NODE_448_length_47763_cov_5.788722_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738548_provirus.448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538446" accession="ERS11140415">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140415</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538446</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738548_virus.234</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738548.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>388.331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738548) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567341) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_17_RAU1026YZ__NODE_234_length_68576_cov_236.183302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4230769230769231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_17_RAU1026YZ__NODE_234_length_68576_cov_236.183302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738548_virus.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538447" accession="ERS11140416">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140416</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538447</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738548_virus.804</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738548.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.8343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738548) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567341) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_17_RAU1026YZ__NODE_804_length_31948_cov_50.287722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_17_RAU1026YZ__NODE_804_length_31948_cov_50.287722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Mitsuokella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738548_virus.804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538448" accession="ERS11140417">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140417</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538448</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738549_provirus.33</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738549.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738549) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558860) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_33_length_266230_cov_6.806115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738549_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8620689655172413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_33_length_266230_cov_6.806115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738578_provirus.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538449" accession="ERS11140418">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140418</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538449</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738549_provirus.699</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738549.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2298.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738549) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558860) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_699_length_67491_cov_1332.446969_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_699_length_67491_cov_1332.446969_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0308466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538450" accession="ERS11140419">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140419</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538450</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738549_virus.1198</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738549.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.5781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738549) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558860) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_1198_length_45294_cov_23.878475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_1198_length_45294_cov_23.878475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738549_virus.1198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538451" accession="ERS11140420">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140420</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538451</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738549_virus.1476</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Winogradskyella phage Peternella_1 virus assembled from ERR7738549.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>245.137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738549) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558860) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_1476_length_38070_cov_140.274314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_1476_length_38070_cov_140.274314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745596_virus.306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Winogradskyella phage Peternella_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538452" accession="ERS11140421">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140421</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538452</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738549_virus.2018</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738549.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738549) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558860) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_2018_length_28947_cov_13.875996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_2018_length_28947_cov_13.875996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0278013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538453" accession="ERS11140422">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140422</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538453</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738549_virus.3077</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738549.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.80592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738549) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558860) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_3077_length_19724_cov_3.658828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_3077_length_19724_cov_3.658828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides thetaiotaomicron</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0252096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538454" accession="ERS11140423">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140423</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538454</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738549_virus.6530</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738549.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.98737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738549) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558860) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_6530_length_9256_cov_3.896394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_N_24_7019__NODE_6530_length_9256_cov_3.896394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738549_virus.6530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538455" accession="ERS11140424">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140424</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538455</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_provirus.100</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.2707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_100_length_225077_cov_18.575009_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738550_bin.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_100_length_225077_cov_18.575009_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538456" accession="ERS11140425">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140425</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538456</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_provirus.2062</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>459.461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2062_length_43942_cov_269.247669_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2062_length_43942_cov_269.247669_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_virus.1626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538457" accession="ERS11140426">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140426</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538457</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_provirus.2772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2772_length_35189_cov_7.520734_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2772_length_35189_cov_7.520734_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-302;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_provirus.2772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538458" accession="ERS11140427">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140427</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538458</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_provirus.443</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_443_length_120084_cov_12.054563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_443_length_120084_cov_12.054563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_provirus.443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538459" accession="ERS11140428">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140428</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538459</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_provirus.775</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.1177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_775_length_87042_cov_18.294199_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738550_bin.228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_775_length_87042_cov_18.294199_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900554205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_provirus.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538460" accession="ERS11140429">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140429</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538460</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.1189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.6441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_1189_length_64557_cov_51.531715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_1189_length_64557_cov_51.531715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738148_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Selenomonas_A;s__Selenomonas_A sp900769615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.1189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538461" accession="ERS11140430">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140430</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538461</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.15544</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.41548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_15544_length_8018_cov_4.182471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_15544_length_8018_cov_4.182471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0091843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538462" accession="ERS11140431">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140431</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538462</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.2100</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2100_length_43284_cov_6.033999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738550_bin.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2100_length_43284_cov_6.033999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-245;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.2100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538463" accession="ERS11140432">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140432</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538463</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.2385</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.1546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2385_length_39154_cov_14.206515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2385_length_39154_cov_14.206515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.2385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538464" accession="ERS11140433">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140433</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538464</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.2693</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13986784140969166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2693_length_35944_cov_9.893746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_2693_length_35944_cov_9.893746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.2693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538465" accession="ERS11140434">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140434</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538465</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.3375</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>106.579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_3375_length_30075_cov_62.594340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_3375_length_30075_cov_62.594340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738574_virus.2447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538466" accession="ERS11140435">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140435</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538466</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.4078</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.41739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_4078_length_25630_cov_5.559386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_4078_length_25630_cov_5.559386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.4078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538467" accession="ERS11140436">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140436</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538467</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738550_virus.6033</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacillus virus 250 virus assembled from ERR7738550.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0490368859781216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.12224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738550) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560050) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_6033_length_18262_cov_3.275282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_9_1942__NODE_6033_length_18262_cov_3.275282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__HGM11416;s__HGM11416 sp900768525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.2426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cecivirus; Bacillus virus 250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538468" accession="ERS11140437">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140437</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538468</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738551_provirus.137</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738551.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738551) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567508) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_137_length_125099_cov_11.800355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.851063829787234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_137_length_125099_cov_11.800355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738551_provirus.137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538469" accession="ERS11140438">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140438</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538469</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738551_provirus.39</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738551.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738551) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567508) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_39_length_212087_cov_9.568303_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738551_bin.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_39_length_212087_cov_9.568303_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738551_provirus.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538470" accession="ERS11140439">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140439</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538470</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738551_virus.1056</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Teseptimavirus virus assembled from ERR7738551.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00270137614315428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>820.947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738551) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567508) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_1056_length_38916_cov_457.136435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9607843137254902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_1056_length_38916_cov_457.136435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738551_virus.1056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae; Studiervirinae; Teseptimavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538471" accession="ERS11140440">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140440</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538471</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738551_virus.1684</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738551.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8061536050499776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>994.239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738551) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567508) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_1684_length_27097_cov_554.496040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_1684_length_27097_cov_554.496040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_virus.1079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538472" accession="ERS11140441">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140441</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538472</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738551_virus.333</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738551.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738551) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567508) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_333_length_81901_cov_12.308870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7659574468085106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_333_length_81901_cov_12.308870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738653_provirus.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538473" accession="ERS11140442">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140442</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538473</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738551_virus.814</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738551.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738551) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567508) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_814_length_46199_cov_8.075170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_7_THA1071YZ__NODE_814_length_46199_cov_8.075170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738548_provirus.10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538474" accession="ERS11140443">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140443</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538474</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_provirus.1227</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_1227_length_64074_cov_18.807819_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_1227_length_64074_cov_18.807819_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738273_bin.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_F;s__Ruminococcus_F champanellensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_provirus.1227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538475" accession="ERS11140444">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140444</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538475</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_provirus.2364</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.1099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_2364_length_41785_cov_17.169848_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_2364_length_41785_cov_17.169848_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_virus.2651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538476" accession="ERS11140445">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140445</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538476</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_provirus.338</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02102144645966927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_338_length_131333_cov_5.757565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738552_bin.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_338_length_131333_cov_5.757565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-302;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_provirus.1139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538477" accession="ERS11140446">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140446</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538477</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_provirus.495</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02188930888561058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_495_length_107606_cov_14.041561_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738552_bin.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_495_length_107606_cov_14.041561_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.2731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538478" accession="ERS11140447">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140447</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538478</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_provirus.638</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.0645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_638_length_92727_cov_23.137410_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5172413793103449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_638_length_92727_cov_23.137410_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_provirus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538479" accession="ERS11140448">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140448</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538479</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.10660</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09694350320639454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.4925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_10660_length_13000_cov_28.290051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_10660_length_13000_cov_28.290051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.3248703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538480" accession="ERS11140449">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140449</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538480</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.1564</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_1564_length_55085_cov_7.988947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_1564_length_55085_cov_7.988947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.1564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538481" accession="ERS11140450">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140450</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538481</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.1956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_1956_length_47854_cov_8.489043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_1956_length_47854_cov_8.489043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__W3P20-009;g__W3P20-009;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.1956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538482" accession="ERS11140451">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140451</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538482</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.2401</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_2401_length_41429_cov_48.348157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8620689655172413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_2401_length_41429_cov_48.348157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738179_virus.1416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538483" accession="ERS11140452">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140452</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538483</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_272_length_145730_cov_9.488215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6949152542372882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_272_length_145730_cov_9.488215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538484" accession="ERS11140453">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140453</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538484</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.3010</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_3010_length_35344_cov_9.746874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738552_bin.123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_3010_length_35344_cov_9.746874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.3010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538485" accession="ERS11140454">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140454</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538485</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.3578</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00495594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_3578_length_30999_cov_12.904631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_3578_length_30999_cov_12.904631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__CAG-238;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_virus.2180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538486" accession="ERS11140455">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140455</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538486</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.501</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.7958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_501_length_106706_cov_31.921682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9666666666666668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_501_length_106706_cov_31.921682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738531_virus.150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538487" accession="ERS11140456">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140456</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538487</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.5627</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05484581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.46971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_5627_length_21757_cov_4.348847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_5627_length_21757_cov_4.348847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_virus.2315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538488" accession="ERS11140457">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140457</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538488</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738552_virus.781</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738552.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.7764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738552) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_781_length_83271_cov_32.004399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_4_2225__NODE_781_length_83271_cov_32.004399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738268_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Thermoplasmatota;c__Thermoplasmata;o__Methanomassiliicoccales;f__Methanomethylophilaceae;g__UBA71;s__UBA71 sp002504495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538489" accession="ERS11140458">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140458</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538489</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_provirus.136</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11920980428964167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>335.976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_136_length_159414_cov_187.430986_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738553_bin.121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_136_length_159414_cov_187.430986_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538490" accession="ERS11140459">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140459</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538490</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_provirus.24461</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. provirus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3.46397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_24461_length_3730_cov_1.687106_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_24461_length_3730_cov_1.687106_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME079077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000434935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_provirus.24461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538491" accession="ERS11140460">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140460</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538491</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_provirus.412</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_412_length_92449_cov_7.059888_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_412_length_92449_cov_7.059888_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_provirus.412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538492" accession="ERS11140461">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140461</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538492</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_provirus.542</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06622982580247963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.0979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_542_length_80248_cov_10.722917_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738553_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3928571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_542_length_80248_cov_10.722917_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_provirus.542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538493" accession="ERS11140462">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140462</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538493</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_provirus.812</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>212.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_812_length_63061_cov_123.865283_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738553_bin.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_812_length_63061_cov_123.865283_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0352664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538494" accession="ERS11140463">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140463</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538494</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.1081</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1081_length_52980_cov_6.188307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1081_length_52980_cov_6.188307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.1081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538495" accession="ERS11140464">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140464</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538495</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.1245</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1245_length_48793_cov_6.962127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1245_length_48793_cov_6.962127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737972_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__RUG115;s__RUG115 sp900066395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.1245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538496" accession="ERS11140465">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140465</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538496</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.14365</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.9717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_14365_length_6131_cov_20.677569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_14365_length_6131_cov_20.677569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000434975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.14365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538497" accession="ERS11140466">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140466</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538497</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.15590</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Microviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.6178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_15590_length_5671_cov_20.659814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_15590_length_5671_cov_20.659814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME232472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RFN20;f__CAG-288;g__CAG-568;s__CAG-568 sp000434395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_virus.17697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538498" accession="ERS11140467">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140467</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538498</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.1662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1662_length_39919_cov_10.271698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1662_length_39919_cov_10.271698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538499" accession="ERS11140468">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140468</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538499</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.1791</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03243392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>151.481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1791_length_37919_cov_85.677184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1791_length_37919_cov_85.677184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738144_virus.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538500" accession="ERS11140469">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140469</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538500</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.1931</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.4094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1931_length_35812_cov_13.119519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_1931_length_35812_cov_13.119519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.1347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538501" accession="ERS11140470">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140470</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538501</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.2038</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.1968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2038_length_34571_cov_22.685974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7575757575757576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2038_length_34571_cov_22.685974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME255646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900552475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.2038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538502" accession="ERS11140471">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140471</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538502</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.2173</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cecivirus virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.8934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2173_length_32967_cov_16.476680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2173_length_32967_cov_16.476680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738273_bin.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_F;s__Ruminococcus_F champanellensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.2173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cecivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538503" accession="ERS11140472">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140472</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538503</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.2445</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2445_length_30123_cov_39.250516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2445_length_30123_cov_39.250516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538504" accession="ERS11140473">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140473</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538504</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.2653</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.5957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2653_length_28136_cov_12.718094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_2653_length_28136_cov_12.718094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.2653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538505" accession="ERS11140474">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140474</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538505</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.336</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.8607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_336_length_102363_cov_20.705678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5853658536585366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_336_length_102363_cov_20.705678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746285_bin.189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538506" accession="ERS11140475">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140475</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538506</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.3691</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>112.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_3691_length_21258_cov_63.131155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_3691_length_21258_cov_63.131155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745779_provirus.1128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538507" accession="ERS11140476">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140476</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538507</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.4549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_4549_length_17601_cov_7.120749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_4549_length_17601_cov_7.120749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.4549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538508" accession="ERS11140477">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140477</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538508</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.61</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.0661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_61_length_238374_cov_12.978367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8412698412698413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_61_length_238374_cov_12.978367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538509" accession="ERS11140478">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140478</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538509</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.849</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>171.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_849_length_61614_cov_96.326324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_849_length_61614_cov_96.326324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538510" accession="ERS11140479">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140479</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538510</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738553_virus.99</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738553.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538510</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.0626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738553) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567226) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_99_length_187437_cov_24.798644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5316455696202531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_13_CHE1006Z__NODE_99_length_187437_cov_24.798644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538511" accession="ERS11140480">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140480</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538511</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738554_provirus.339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738554.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738554) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567414) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_339_length_76533_cov_9.479505_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_339_length_76533_cov_9.479505_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_provirus.339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538512" accession="ERS11140481">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140481</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538512</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738554_provirus.772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738554.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738554) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567414) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_772_length_46353_cov_6.428581_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_772_length_46353_cov_6.428581_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_provirus.772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538513" accession="ERS11140482">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140482</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538513</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738554_virus.1193</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738554.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.43181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738554) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567414) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_1193_length_33771_cov_5.230961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_1193_length_33771_cov_5.230961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.1193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538514" accession="ERS11140483">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140483</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538514</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738554_virus.2153</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738554.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2544333617490467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.7082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738554) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567414) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_2153_length_21227_cov_15.587234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_2153_length_21227_cov_15.587234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME212098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus lutetiensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.2153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538515" accession="ERS11140484">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140484</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538515</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738554_virus.535</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738554.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738554) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567414) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_535_length_59552_cov_8.396789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_535_length_59552_cov_8.396789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538516" accession="ERS11140485">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140485</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538516</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738554_virus.967</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738554.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738554) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567414) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_967_length_39127_cov_8.964430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.53125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_9_THA0062YZ__NODE_967_length_39127_cov_8.964430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538517" accession="ERS11140486">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140486</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538517</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738555_provirus.175</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738555.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.852477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>289.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738555) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567620) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_175_length_124394_cov_167.965113_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738555_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_175_length_124394_cov_167.965113_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME108259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900313215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738433_provirus.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538518" accession="ERS11140487">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140487</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538518</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738555_provirus.393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Gokushovirus WZ-2015a provirus assembled from ERR7738555.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.6568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738555) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567620) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_393_length_84369_cov_31.195511_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_393_length_84369_cov_31.195511_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.5992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; unclassified Gokushovirinae; Gokushovirus WZ-2015a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538519" accession="ERS11140488">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140488</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538519</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738555_virus.1070</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738555.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>78.401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738555) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567620) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_1070_length_43175_cov_44.700172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_1070_length_43175_cov_44.700172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745366_virus.421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538520" accession="ERS11140489">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140489</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538520</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738555_virus.2076</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738555.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.04552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738555) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567620) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_2076_length_24778_cov_3.700538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_2076_length_24778_cov_3.700538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__CAG-492 sp000434335;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745366_virus.485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538521" accession="ERS11140490">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140490</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538521</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738555_virus.3536</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Picovirinae virus assembled from ERR7738555.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.6256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738555) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567620) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_3536_length_15071_cov_10.973389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_3536_length_15071_cov_10.973389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia sp900552665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745366_virus.2096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae; Picovirinae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538522" accession="ERS11140491">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140491</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538522</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738555_virus.653</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738555.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.9801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738555) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567620) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_653_length_61832_cov_27.487653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6888888888888889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_653_length_61832_cov_27.487653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738555_virus.653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538523" accession="ERS11140492">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140492</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538523</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738555_virus.984</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738555.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.48941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738555) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567620) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_984_length_45975_cov_4.744586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_24_2346__NODE_984_length_45975_cov_4.744586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738555_virus.984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538524" accession="ERS11140493">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140493</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538524</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_provirus.163</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_163_length_161575_cov_11.108905_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738556_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8205128205128205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_163_length_161575_cov_11.108905_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__UBA1191 sp900549125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.2635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538525" accession="ERS11140494">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140494</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538525</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_provirus.286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cellulophaga phage Ingeline_8 provirus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.0428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_286_length_129291_cov_43.593697_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738556_bin.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_286_length_129291_cov_43.593697_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cellulophaga phage Ingeline_8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538526" accession="ERS11140495">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140495</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538526</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_provirus.601</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.32291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_601_length_84712_cov_5.173226_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738556_bin.303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_601_length_84712_cov_5.173226_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.1513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538527" accession="ERS11140496">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140496</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538527</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.119</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.2616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_119_length_186900_cov_16.453381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7183098591549296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_119_length_186900_cov_16.453381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538528" accession="ERS11140497">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140497</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538528</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.1406</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.8832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1406_length_49144_cov_13.090529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8780487804878049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1406_length_49144_cov_13.090529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738273_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__WG-1;s__WG-1 sp900539665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.1406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538529" accession="ERS11140498">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140498</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538529</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.1579</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1579_length_45579_cov_8.851875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5576923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1579_length_45579_cov_8.851875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538530" accession="ERS11140499">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140499</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538530</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.1681</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1681_length_43729_cov_10.992532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6764705882352942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1681_length_43729_cov_10.992532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.1681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538531" accession="ERS11140500">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140500</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538531</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.1873</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1873_length_40551_cov_6.864679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_1873_length_40551_cov_6.864679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.1873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538532" accession="ERS11140501">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140501</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538532</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.2118</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.2877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_2118_length_37099_cov_8.689509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_2118_length_37099_cov_8.689509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.2118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538533" accession="ERS11140502">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140502</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538533</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.2466</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03945538818076477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.7425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_2466_length_33223_cov_19.315815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_2466_length_33223_cov_19.315815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.1150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538534" accession="ERS11140503">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140503</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538534</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.2773</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_2773_length_30500_cov_7.821714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_2773_length_30500_cov_7.821714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746741_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;s__RUG572 sp900547945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.1666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538535" accession="ERS11140504">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140504</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538535</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.315</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>386.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_315_length_121941_cov_217.359934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.943661971830986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_315_length_121941_cov_217.359934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738531_virus.150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538536" accession="ERS11140505">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140505</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538536</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.3822</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_3822_length_23822_cov_6.814108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_3822_length_23822_cov_6.814108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538537" accession="ERS11140506">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140506</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538537</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.459</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_459_length_97580_cov_11.001579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2692307692307692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_459_length_97580_cov_11.001579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747782_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola sp000434735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538538" accession="ERS11140507">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140507</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538538</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.5578</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.4909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_5578_length_17645_cov_45.777892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_5578_length_17645_cov_45.777892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.7124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538539" accession="ERS11140508">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140508</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538539</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.7067</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.7998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_7067_length_14532_cov_24.126116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_7067_length_14532_cov_24.126116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.7067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538540" accession="ERS11140509">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140509</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538540</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738556_virus.9931</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560131) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_9931_length_11157_cov_18.286101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_3_2141__NODE_9931_length_11157_cov_18.286101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738513_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900551275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0055675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538541" accession="ERS11140510">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140510</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538541</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_provirus.1277</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1277_length_58633_cov_8.375640_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1277_length_58633_cov_8.375640_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_provirus.1277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538542" accession="ERS11140511">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140511</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538542</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_provirus.2387</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2387_length_39914_cov_10.660115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2387_length_39914_cov_10.660115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-448;s__CAG-448 sp003150135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_provirus.2387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538544" accession="ERS11140513">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140513</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538544</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.1131</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05422945491458051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.81792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1131_length_62865_cov_5.193126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1131_length_62865_cov_5.193126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.1131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538545" accession="ERS11140514">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140514</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538545</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.1352</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1352_length_56811_cov_10.323563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9696969696969696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1352_length_56811_cov_10.323563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__Ruminococcus sp900540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538546" accession="ERS11140515">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140515</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538546</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.1547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11513032049760293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.7359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1547_length_53013_cov_20.186659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1547_length_53013_cov_20.186659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME155972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Pasteurellaceae;g__Haemophilus_D;s__Haemophilus_D parainfluenzae_K</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.1545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538547" accession="ERS11140516">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140516</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538547</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.1819</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1819_length_48024_cov_6.372328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_1819_length_48024_cov_6.372328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME285832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002300055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.1819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538548" accession="ERS11140517">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140517</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538548</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.2074</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.0656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2074_length_44224_cov_39.810248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2074_length_44224_cov_39.810248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.2074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538549" accession="ERS11140518">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140518</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538549</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.2268</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2268_length_41288_cov_9.141055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2268_length_41288_cov_9.141055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745963_bin.383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745743_virus.1484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538550" accession="ERS11140519">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140519</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538550</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.2467</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12230176211453749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2467_length_38998_cov_7.286118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2467_length_38998_cov_7.286118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738141_virus.285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538551" accession="ERS11140520">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140520</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538551</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.2598</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>143.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2598_length_37584_cov_84.829125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_2598_length_37584_cov_84.829125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME011266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1696;s__UMGS1696 sp900554225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.2598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538552" accession="ERS11140521">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140521</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538552</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.288</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.8612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_288_length_123276_cov_20.122947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5217391304347826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_288_length_123276_cov_20.122947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538553" accession="ERS11140522">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140522</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538553</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.32684</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.25287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_32684_length_5374_cov_4.895412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_32684_length_5374_cov_4.895412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME040022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA1409;s__UBA1409 sp002338885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.32684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538554" accession="ERS11140523">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140523</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538554</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.3827</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_3827_length_29062_cov_7.421080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3636363636363637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_3827_length_29062_cov_7.421080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538555" accession="ERS11140524">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140524</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538555</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.431</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_431_length_101980_cov_8.276420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5822784810126582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_431_length_101980_cov_8.276420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0370433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538556" accession="ERS11140525">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140525</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538556</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.4664</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13997797356828195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.9663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_4664_length_25383_cov_13.297044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_4664_length_25383_cov_13.297044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.4664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538557" accession="ERS11140526">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140526</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538557</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.6136</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.29262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_6136_length_20603_cov_4.803274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_6136_length_20603_cov_4.803274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746285_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFTH01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_provirus.3166142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538558" accession="ERS11140527">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140527</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538558</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_virus.8768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.59923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_8768_length_15785_cov_5.042271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738557_bin.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_8768_length_15785_cov_5.042271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738584_bin.295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-460;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0129178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538559" accession="ERS11140528">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140528</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538559</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_provirus.1336</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.84947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1336_length_47482_cov_5.608396_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738558_bin.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5862068965517241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1336_length_47482_cov_5.608396_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_provirus.1336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538560" accession="ERS11140529">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140529</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538560</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_provirus.392</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.8017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_392_length_91868_cov_14.892430_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6060606060606061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_392_length_91868_cov_14.892430_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_provirus.392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538561" accession="ERS11140530">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140530</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538561</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_provirus.856</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_856_length_62062_cov_8.772542_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_856_length_62062_cov_8.772542_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738575_provirus.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538562" accession="ERS11140531">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140531</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538562</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.1285</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>186.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1285_length_48724_cov_107.346065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1285_length_48724_cov_107.346065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_provirus.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538563" accession="ERS11140532">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140532</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538563</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.1480</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vectrevirus virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03964755271026948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.32471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1480_length_44217_cov_5.173108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1480_length_44217_cov_5.173108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.1480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae; Molineuxvirinae; Vectrevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538564" accession="ERS11140533">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140533</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538564</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.1689</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.6045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1689_length_40662_cov_10.553604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_1689_length_40662_cov_10.553604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.1689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538565" accession="ERS11140534">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140534</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538565</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.191</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03727973568281938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_191_length_124683_cov_13.341147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8043478260869565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_191_length_124683_cov_13.341147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538566" accession="ERS11140535">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140535</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538566</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.2051</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6129947229551451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.5638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_2051_length_35424_cov_39.039551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_2051_length_35424_cov_39.039551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538567" accession="ERS11140536">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140536</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538567</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.2654</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.29000921161364956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.4344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_2654_length_29342_cov_5.313617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738558_bin.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8809523809523809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_2654_length_29342_cov_5.313617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0228353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538568" accession="ERS11140537">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140537</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538568</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.3474</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Yongloolinvirus virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01734581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_3474_length_23421_cov_14.892606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_3474_length_23421_cov_14.892606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0289566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Yongloolinvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538569" accession="ERS11140538">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140538</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538569</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.4240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_4240_length_19890_cov_9.115040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_4240_length_19890_cov_9.115040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.4240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538570" accession="ERS11140539">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140539</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538570</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738558_virus.839</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738558.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.9243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738558) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567246) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_839_length_62992_cov_28.187745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_13_CHE0014TZ__NODE_839_length_62992_cov_28.187745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Megasphaeraceae;g__Megasphaera;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538571" accession="ERS11140540">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140540</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538571</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_provirus.1156</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.6275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1156_length_52605_cov_19.367975_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738559_bin.162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1156_length_52605_cov_19.367975_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_provirus.1156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538572" accession="ERS11140541">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140541</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538572</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_provirus.1484</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1484_length_44674_cov_8.964168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9032258064516128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1484_length_44674_cov_8.964168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_virus.2710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538573" accession="ERS11140542">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140542</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538573</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_provirus.1766</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.1116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1766_length_40357_cov_15.442602_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1766_length_40357_cov_15.442602_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_provirus.1766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538574" accession="ERS11140543">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140543</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538574</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_provirus.3375</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11996874984972046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.1137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_3375_length_25726_cov_22.219814_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_3375_length_25726_cov_22.219814_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME104932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_I;s__Eubacterium_I ramulus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.2244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538575" accession="ERS11140544">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140544</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538575</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_provirus.563</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_563_length_77539_cov_8.758423_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_563_length_77539_cov_8.758423_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0375088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538576" accession="ERS11140545">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140545</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538576</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.1051</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1051_length_55831_cov_8.741543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1051_length_55831_cov_8.741543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738159_bin.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales_A;f__UBA1381;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538577" accession="ERS11140546">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140546</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538577</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.1510</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1510_length_44136_cov_7.040219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1510_length_44136_cov_7.040219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_virus.1510</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538578" accession="ERS11140547">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140547</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538578</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.1811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.1236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1811_length_39699_cov_12.397254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_1811_length_39699_cov_12.397254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia;s__Blautia stercoris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_virus.1811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538579" accession="ERS11140548">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140548</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538579</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.2145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.25873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_2145_length_35735_cov_4.813029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_2145_length_35735_cov_4.813029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_virus.2145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538580" accession="ERS11140549">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140549</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538580</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.2804</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.5095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_2804_length_29627_cov_17.852386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_2804_length_29627_cov_17.852386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_provirus.1734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538581" accession="ERS11140550">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140550</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538581</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.3346</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11545151506649157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_3346_length_25961_cov_5.857016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_3346_length_25961_cov_5.857016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME216999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales_A;f__UBA1381;g__CAG-41;s__CAG-41 sp900066215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_virus.3346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538582" accession="ERS11140551">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140551</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538582</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.4395</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>103.129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_4395_length_20795_cov_63.404817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_4395_length_20795_cov_63.404817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_virus.4395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538583" accession="ERS11140552">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140552</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538583</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.5016</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.1059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_5016_length_18660_cov_11.818490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_5016_length_18660_cov_11.818490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.4536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538584" accession="ERS11140553">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140553</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538584</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.7734</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_7734_length_13249_cov_15.160112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_7734_length_13249_cov_15.160112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.10211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538585" accession="ERS11140554">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140554</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538585</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738559_virus.950</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567799) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_950_length_59280_cov_8.880361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_8_1528__NODE_950_length_59280_cov_8.880361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538586" accession="ERS11140555">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140555</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538586</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_provirus.2532</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.877477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.19167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_2532_length_26534_cov_3.997203_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_2532_length_26534_cov_3.997203_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538587" accession="ERS11140556">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140556</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538587</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_provirus.69</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01554291393558823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.8896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_69_length_233941_cov_10.983760_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738560_bin.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5277777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_69_length_233941_cov_10.983760_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_provirus.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538588" accession="ERS11140557">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140557</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538588</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.1059</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1059_length_51736_cov_20.718055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1059_length_51736_cov_20.718055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.1059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538589" accession="ERS11140558">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140558</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538589</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.1359</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1359_length_42887_cov_48.104952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1359_length_42887_cov_48.104952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.1359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538590" accession="ERS11140559">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140559</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538590</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.1498</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1498_length_39594_cov_8.988790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1498_length_39594_cov_8.988790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.1498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538591" accession="ERS11140560">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140560</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538591</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.1600</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.95811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1600_length_37625_cov_4.486284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1600_length_37625_cov_4.486284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538592" accession="ERS11140561">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140561</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538592</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.1749</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1749_length_35323_cov_11.608750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_1749_length_35323_cov_11.608750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME256624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-272;s__CAG-272 sp000433515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.1896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538593" accession="ERS11140562">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140562</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538593</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.2216</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01445538818076477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.04106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_2216_length_29401_cov_4.476163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_2216_length_29401_cov_4.476163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746757_bin.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Elusimicrobiota;c__Elusimicrobia;o__Elusimicrobiales;f__Elusimicrobiaceae;g__UBA1436;s__UBA1436 sp900541355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738271_provirus.247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538594" accession="ERS11140563">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140563</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538594</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.3323</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_3323_length_21477_cov_5.180981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_3323_length_21477_cov_5.180981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.3323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538595" accession="ERS11140564">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140564</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538595</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.426</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.2706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_426_length_93931_cov_27.872685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.543859649122807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_426_length_93931_cov_27.872685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538596" accession="ERS11140565">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140565</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538596</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.697</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.2161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_697_length_68793_cov_39.774128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_697_length_68793_cov_39.774128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538597" accession="ERS11140566">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140566</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538597</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738560_virus.988</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738560.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.2694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738560) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567601) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_988_length_54475_cov_36.689069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_9_THA1076JZ__NODE_988_length_54475_cov_36.689069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.1040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538598" accession="ERS11140567">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140567</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538598</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_provirus.1611</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1611_length_33737_cov_9.810131_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738561_bin.248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1611_length_33737_cov_9.810131_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746785_bin.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_I;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.1004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538599" accession="ERS11140568">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140568</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538599</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_provirus.433</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_433_length_74447_cov_9.940715_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738561_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_433_length_74447_cov_9.940715_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_provirus.433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538600" accession="ERS11140569">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140569</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538600</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_provirus.774</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.7037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_774_length_53272_cov_16.516421_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_774_length_53272_cov_16.516421_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738660_provirus.1174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538601" accession="ERS11140570">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140570</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538601</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.102</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.3716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_102_length_152677_cov_31.784168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6981132075471698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_102_length_152677_cov_31.784168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_E</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538602" accession="ERS11140571">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140571</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538602</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.1225</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.54912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1225_length_39974_cov_5.360403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8214285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1225_length_39974_cov_5.360403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_virus.1225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538603" accession="ERS11140572">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140572</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538603</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.1400</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1400_length_36960_cov_12.586747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1400_length_36960_cov_12.586747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538604" accession="ERS11140573">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140573</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538604</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.1596</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5895374449339206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.0376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1596_length_33855_cov_27.343271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_1596_length_33855_cov_27.343271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.1676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538605" accession="ERS11140574">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140574</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538605</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.2070</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17779457421297337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.4954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_2070_length_28804_cov_15.409963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8055555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_2070_length_28804_cov_15.409963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_provirus.787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538606" accession="ERS11140575">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140575</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538606</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.2603</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_2603_length_24718_cov_7.199424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_2603_length_24718_cov_7.199424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738660_virus.4711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538607" accession="ERS11140576">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140576</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538607</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.306</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_306_length_88880_cov_6.428477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6739130434782609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_306_length_88880_cov_6.428477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745854_bin.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__HGM13006;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738228_virus.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538608" accession="ERS11140577">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140577</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538608</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.3850</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_3850_length_19052_cov_6.052964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_3850_length_19052_cov_6.052964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.8397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538609" accession="ERS11140578">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140578</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538609</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.4485</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17514496130346496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_4485_length_17171_cov_8.495905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_4485_length_17171_cov_8.495905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_virus.4485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538610" accession="ERS11140579">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140579</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538610</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.629</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_629_length_60127_cov_8.192939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_629_length_60127_cov_8.192939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538611" accession="ERS11140580">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140580</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538611</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.732</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_732_length_55297_cov_8.175317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_732_length_55297_cov_8.175317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737953_virus.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538612" accession="ERS11140581">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140581</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538612</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738561_virus.9640</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.76285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567869) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_9640_length_9798_cov_3.270034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_16_1564__NODE_9640_length_9798_cov_3.270034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738150_virus.4712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538613" accession="ERS11140582">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140582</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538613</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738562_provirus.1791</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738562.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738562) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_1791_length_35634_cov_12.355992_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738562_bin.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_1791_length_35634_cov_12.355992_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0251367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538614" accession="ERS11140583">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140583</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538614</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738562_provirus.278</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738562.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738562) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_278_length_125362_cov_6.853598_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738562_bin.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_278_length_125362_cov_6.853598_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0327705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538615" accession="ERS11140584">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140584</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538615</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738562_provirus.903</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738562.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738562) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_903_length_64989_cov_12.691074_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738562_bin.100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_903_length_64989_cov_12.691074_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738562_provirus.903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538616" accession="ERS11140585">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140585</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538616</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738562_virus.1327</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738562.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>789.882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738562) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_1327_length_47118_cov_461.921792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_1327_length_47118_cov_461.921792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0377017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538617" accession="ERS11140586">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140586</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538617</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738562_virus.1845</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738562.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01542166484257643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738562) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_1845_length_34522_cov_9.385281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_1845_length_34522_cov_9.385281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Bilophila;s__Bilophila wadsworthia</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738562_virus.1845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538618" accession="ERS11140587">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140587</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538618</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738562_virus.2728</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738562.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1885875257548235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.45285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738562) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_2728_length_23164_cov_4.115346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738562_bin.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_G_23_1002__NODE_2728_length_23164_cov_4.115346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides fragilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738937_provirus.68_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538619" accession="ERS11140588">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140588</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538619</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_provirus.11</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>234.687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_11_length_383712_cov_133.232213_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738563_bin.204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_11_length_383712_cov_133.232213_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_provirus.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538620" accession="ERS11140589">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140589</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538620</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_provirus.1906</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1906_length_34522_cov_11.178168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1906_length_34522_cov_11.178168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_provirus.1906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538621" accession="ERS11140590">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140590</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538621</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_provirus.550</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.5486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_550_length_74665_cov_13.422467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738563_bin.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_550_length_74665_cov_13.422467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_provirus.550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538622" accession="ERS11140591">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140591</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538622</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.1066</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.4705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1066_length_50295_cov_35.974909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1066_length_50295_cov_35.974909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538623" accession="ERS11140592">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140592</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538623</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.12663</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.9432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_12663_length_8198_cov_28.967738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_12663_length_8198_cov_28.967738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.12663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538624" accession="ERS11140593">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140593</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538624</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.1467</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.4992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1467_length_41242_cov_15.315559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1467_length_41242_cov_15.315559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.1467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538625" accession="ERS11140594">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140594</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538625</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.1594</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.8552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1594_length_38762_cov_41.073930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1594_length_38762_cov_41.073930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.1594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538626" accession="ERS11140595">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140595</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538626</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.1730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1730_length_36742_cov_9.696277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_1730_length_36742_cov_9.696277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.3571503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538627" accession="ERS11140596">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140596</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538627</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.2008</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_2008_length_33177_cov_6.811450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_2008_length_33177_cov_6.811450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.2008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538628" accession="ERS11140597">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140597</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538628</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.2346</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_2346_length_29948_cov_8.821365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_2346_length_29948_cov_8.821365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.2346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538629" accession="ERS11140598">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140598</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538629</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.9962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_286_length_106819_cov_16.498642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_286_length_106819_cov_16.498642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538630" accession="ERS11140599">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140599</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538630</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.324</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>247.659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_324_length_100135_cov_140.692478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_324_length_100135_cov_140.692478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538631" accession="ERS11140600">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140600</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538631</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.3964</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_3964_length_20505_cov_14.076757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_3964_length_20505_cov_14.076757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.3964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538632" accession="ERS11140601">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140601</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538632</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.5120</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>92.1263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_5120_length_16755_cov_53.547368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_5120_length_16755_cov_53.547368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.5120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538633" accession="ERS11140602">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140602</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538633</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.65</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.26998632496778685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.8569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_65_length_188306_cov_17.959682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_65_length_188306_cov_17.959682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538634" accession="ERS11140603">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140603</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538634</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.771</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.3606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_771_length_61345_cov_54.181188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9142857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_771_length_61345_cov_54.181188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-791;s__CAG-791 sp000431495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738536_provirus.683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538635" accession="ERS11140604">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140604</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538635</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738563_virus.922</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>247.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560756) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_922_length_55580_cov_138.828334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_1_1731__NODE_922_length_55580_cov_138.828334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_virus.922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538636" accession="ERS11140605">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140605</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538636</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738564_provirus.25</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738564.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.0925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738564) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567396) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_3_THA0078JZ__NODE_25_length_280696_cov_10.979659_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738564_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_3_THA0078JZ__NODE_25_length_280696_cov_10.979659_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738564_provirus.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538637" accession="ERS11140606">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140606</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538637</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738564_provirus.580</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738564.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>218.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738564) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567396) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_3_THA0078JZ__NODE_580_length_46053_cov_127.457608_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_3_THA0078JZ__NODE_580_length_46053_cov_127.457608_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738144_virus.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538638" accession="ERS11140607">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140607</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538638</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738564_virus.150</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738564.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>131.394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738564) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567396) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_3_THA0078JZ__NODE_150_length_126186_cov_76.725135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_3_THA0078JZ__NODE_150_length_126186_cov_76.725135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738564_virus.150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538639" accession="ERS11140608">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140608</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538639</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_provirus.1147</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.96124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_1147_length_53661_cov_5.252034_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_1147_length_53661_cov_5.252034_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.2186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538640" accession="ERS11140609">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140609</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538640</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_provirus.243</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.5479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_243_length_128332_cov_31.715824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738565_bin.244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9117647058823528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_243_length_128332_cov_31.715824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.2420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538641" accession="ERS11140610">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140610</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538641</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_provirus.553</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.9685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_553_length_83843_cov_14.822637_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_553_length_83843_cov_14.822637_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738531_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_B;c__Peptococcia;o__Peptococcales;f__Peptococcaceae;g__UMGS1590;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_provirus.931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538642" accession="ERS11140611">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140611</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538642</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_provirus.93</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.5293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_93_length_190118_cov_16.501355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738565_bin.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_93_length_190118_cov_16.501355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.2062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538643" accession="ERS11140612">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140612</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538643</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.1449</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vegasvirus virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_1449_length_45993_cov_11.296346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_1449_length_45993_cov_11.296346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_virus.1449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Gochnauervirinae; Vegasvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538644" accession="ERS11140613">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140613</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538644</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.1739</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_1739_length_41278_cov_11.333536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_1739_length_41278_cov_11.333536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_virus.1739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538645" accession="ERS11140614">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140614</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538645</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.2068</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18491189427312776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.83415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_2068_length_36815_cov_5.660025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738565_bin.257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9666666666666668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_2068_length_36815_cov_5.660025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_virus.1263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538646" accession="ERS11140615">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140615</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538646</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.2306</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_2306_length_34238_cov_10.340213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_2306_length_34238_cov_10.340213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_virus.2306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538647" accession="ERS11140616">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140616</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538647</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.3021</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>492.588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_3021_length_28410_cov_298.815268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_3021_length_28410_cov_298.815268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746440_virus.1641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538648" accession="ERS11140617">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140617</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538648</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.3879</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.3311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_3879_length_24054_cov_26.643492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_3879_length_24054_cov_26.643492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.2314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538650" accession="ERS11140619">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140619</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538650</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.6235</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.85478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_6235_length_17317_cov_5.437877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_6235_length_17317_cov_5.437877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.6263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538651" accession="ERS11140620">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140620</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538651</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.9052</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.02077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_9052_length_13197_cov_3.861890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_9052_length_13197_cov_3.861890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P perfringens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_virus.9052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538652" accession="ERS11140621">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140621</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538652</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_provirus.1691</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.9989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1691_length_41006_cov_11.866305_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1691_length_41006_cov_11.866305_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_virus.1419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538653" accession="ERS11140622">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140622</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538653</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_provirus.275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.9307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_275_length_111215_cov_39.308418_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_275_length_111215_cov_39.308418_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_provirus.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538654" accession="ERS11140623">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140623</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538654</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_provirus.7</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_7_length_422537_cov_8.095491_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738566_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_7_length_422537_cov_8.095491_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746111_bin.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_provirus.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538655" accession="ERS11140624">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140624</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538655</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_provirus.962</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.9675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_962_length_58891_cov_24.167375_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_962_length_58891_cov_24.167375_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.1765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538656" accession="ERS11140625">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140625</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538656</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.1286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.6545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1286_length_48769_cov_19.119773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738566_bin.260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1286_length_48769_cov_19.119773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.1286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538658" accession="ERS11140627">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140627</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538658</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.16057</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_16057_length_6690_cov_11.712990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_16057_length_6690_cov_11.712990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.16057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538659" accession="ERS11140628">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140628</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538659</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.1781</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.0454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1781_length_39478_cov_18.570899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1781_length_39478_cov_18.570899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.1781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538660" accession="ERS11140629">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140629</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538660</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.2096</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.5124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_2096_length_35351_cov_23.947724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738566_bin.193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_2096_length_35351_cov_23.947724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0336364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538661" accession="ERS11140630">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140630</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538661</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.2997</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.3137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_2997_length_27116_cov_17.803358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_2997_length_27116_cov_17.803358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_provirus.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538662" accession="ERS11140631">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140631</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538662</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.3400</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>150.433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_3400_length_24475_cov_82.934216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_3400_length_24475_cov_82.934216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME023798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900543155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538664" accession="ERS11140633">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140633</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538664</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.7250</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_7250_length_13056_cov_6.927892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_7250_length_13056_cov_6.927892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.7250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538665" accession="ERS11140634">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140634</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538665</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.933</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.9975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_933_length_60004_cov_15.117276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.40816326530612246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_933_length_60004_cov_15.117276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538666" accession="ERS11140635">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140635</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538666</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_provirus.1629</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08134553233439058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1629_length_48601_cov_7.400235_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738567_bin.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1629_length_48601_cov_7.400235_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_provirus.2113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538667" accession="ERS11140636">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140636</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538667</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_provirus.259</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.1242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_259_length_138001_cov_18.425705_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738567_bin.318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_259_length_138001_cov_18.425705_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_provirus.259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538669" accession="ERS11140637">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140637</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538669</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_provirus.923</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>123.291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_923_length_69203_cov_71.831033_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_923_length_69203_cov_71.831033_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_provirus.923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538670" accession="ERS11140638">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140638</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538670</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.1339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.3508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1339_length_54863_cov_11.482696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1339_length_54863_cov_11.482696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738604_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.1001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538671" accession="ERS11140639">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140639</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538671</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.1644</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1644_length_48354_cov_12.916813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9696969696969696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1644_length_48354_cov_12.916813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538672" accession="ERS11140640">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140640</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538672</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.1989</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>335.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1989_length_42583_cov_189.416412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9310344827586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_1989_length_42583_cov_189.416412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900547315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.1989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538674" accession="ERS11140642">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140642</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538674</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.2617</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1682162390555499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_2617_length_35490_cov_11.811764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738567_bin.291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_2617_length_35490_cov_11.811764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738257_provirus.416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538675" accession="ERS11140643">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140643</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538675</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.3065</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_3065_length_31521_cov_7.410444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_3065_length_31521_cov_7.410444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.2306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538676" accession="ERS11140644">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140644</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538676</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.3454</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_3454_length_28641_cov_6.744819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_3454_length_28641_cov_6.744819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.3454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538677" accession="ERS11140645">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140645</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538677</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.4494</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_4494_length_23170_cov_6.997012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_4494_length_23170_cov_6.997012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538678" accession="ERS11140646">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140646</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538678</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.5327</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16913360473183253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_5327_length_20055_cov_10.034938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738567_bin.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_5327_length_20055_cov_10.034938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.5327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538680" accession="ERS11140648">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140648</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538680</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_provirus.140</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_140_length_173287_cov_6.905427_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738568_bin.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_140_length_173287_cov_6.905427_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738537_provirus.867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538681" accession="ERS11140649">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140649</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538681</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_provirus.2513</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.3709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2513_length_36415_cov_10.838158_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2513_length_36415_cov_10.838158_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738233_virus.443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538682" accession="ERS11140650">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140650</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538682</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_provirus.614</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.0657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_614_length_86453_cov_19.956261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738568_bin.326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5277777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_614_length_86453_cov_19.956261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738568_bin.326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__SFEL01;s__SFEL01 sp004557245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_provirus.614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538683" accession="ERS11140651">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140651</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538683</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.1142</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_1142_length_60584_cov_11.899929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_1142_length_60584_cov_11.899929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738566_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538684" accession="ERS11140652">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140652</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538684</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.1331</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_1331_length_55331_cov_25.507601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9705882352941176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_1331_length_55331_cov_25.507601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900066885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538685" accession="ERS11140653">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140653</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538685</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.1699</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_1699_length_47494_cov_8.518548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8387096774193549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_1699_length_47494_cov_8.518548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_virus.1699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538687" accession="ERS11140655">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140655</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538687</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.2243</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2243_length_39252_cov_5.850645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2243_length_39252_cov_5.850645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME246707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__Phil1;s__Phil1 sp001940855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_virus.2243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538688" accession="ERS11140656">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140656</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538688</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.2416</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2416_length_37520_cov_12.074406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2416_length_37520_cov_12.074406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738241_virus.1296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538689" accession="ERS11140657">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140657</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538689</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.2760</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2760_length_34182_cov_8.616215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2760_length_34182_cov_8.616215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_virus.2315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538690" accession="ERS11140658">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140658</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538690</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.3455</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08458149779735687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_3455_length_28959_cov_6.195070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_3455_length_28959_cov_6.195070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738213_virus.1405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538691" accession="ERS11140659">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140659</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538691</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.5340</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.36559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_5340_length_20525_cov_3.508852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_5340_length_20525_cov_3.508852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.1610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538693" accession="ERS11140661">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140661</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538693</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_provirus.174</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.0328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_174_length_110213_cov_20.776976_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738569_bin.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_174_length_110213_cov_20.776976_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.2010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538694" accession="ERS11140662">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140662</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538694</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_provirus.537</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.0095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_537_length_67752_cov_16.991090_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_537_length_67752_cov_16.991090_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738221_virus.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538695" accession="ERS11140663">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140663</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538695</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_provirus.935</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>400.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_935_length_49953_cov_251.107827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_935_length_49953_cov_251.107827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538696" accession="ERS11140664">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140664</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538696</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.1342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_1342_length_41028_cov_16.542819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_1342_length_41028_cov_16.542819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_provirus.343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538697" accession="ERS11140665">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140665</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538697</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.1563</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.2577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_1563_length_37773_cov_22.701772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738569_bin.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_1563_length_37773_cov_22.701772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_virus.1326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538698" accession="ERS11140666">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140666</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538698</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.193</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_193_length_106570_cov_10.271849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_193_length_106570_cov_10.271849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738569_virus.193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538699" accession="ERS11140667">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140667</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538699</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.228</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.5066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_228_length_98051_cov_19.737216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.922077922077922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_228_length_98051_cov_19.737216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538700" accession="ERS11140668">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140668</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538700</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.27133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.74486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_27133_length_5253_cov_4.670015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_27133_length_5253_cov_4.670015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738588_bin.321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger sp004555405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_virus.17872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538702" accession="ERS11140670">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140670</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538702</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.4072</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_4072_length_21018_cov_6.878898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_4072_length_21018_cov_6.878898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745854_bin.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_provirus.219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538703" accession="ERS11140671">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140671</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538703</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.620</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.8132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_620_length_61822_cov_14.701450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48484848484848486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_620_length_61822_cov_14.701450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738569_virus.620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538704" accession="ERS11140672">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140672</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538704</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.850</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_850_length_52883_cov_13.903893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_850_length_52883_cov_13.903893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0328330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538705" accession="ERS11140673">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140673</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538705</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_provirus.120</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.4158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_120_length_198478_cov_40.655072_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5151515151515151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_120_length_198478_cov_40.655072_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538706" accession="ERS11140674">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140674</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538706</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_provirus.192</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_192_length_162322_cov_7.447385_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_192_length_162322_cov_7.447385_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__UMGS124;g__UMGS124;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_provirus.192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538708" accession="ERS11140676">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140676</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538708</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_provirus.512</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.889977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.3573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_512_length_100202_cov_17.159660_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_512_length_100202_cov_17.159660_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_provirus.512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538709" accession="ERS11140677">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140677</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538709</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_provirus.9</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.1499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_9_length_458744_cov_22.631277_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5588235294117647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_9_length_458744_cov_22.631277_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538710" accession="ERS11140678">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140678</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538710</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.11112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11996874984972046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.4646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_11112_length_11900_cov_29.818236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_11112_length_11900_cov_29.818236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.11112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538711" accession="ERS11140679">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140679</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538711</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.1411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.8982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_1411_length_54617_cov_25.954474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9577464788732394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_1411_length_54617_cov_25.954474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.2014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538712" accession="ERS11140680">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140680</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538712</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.1926</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_1926_length_44271_cov_10.824931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7714285714285715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_1926_length_44271_cov_10.824931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538713" accession="ERS11140681">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140681</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538713</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.2296</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_2296_length_39061_cov_6.826031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_2296_length_39061_cov_6.826031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_virus.748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538714" accession="ERS11140682">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140682</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538714</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.2680</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Pseudomonas phage PPAT virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_2680_length_35160_cov_12.142890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_2680_length_35160_cov_12.142890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.2680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Pseudomonas phage PPAT</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538716" accession="ERS11140684">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140684</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538716</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.3844</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_3844_length_27448_cov_5.846918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_3844_length_27448_cov_5.846918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME138453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Lactococcus;s__Lactococcus lactis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.3844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538717" accession="ERS11140685">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140685</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538717</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.501</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>686.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_501_length_101384_cov_388.881242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_501_length_101384_cov_388.881242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538718" accession="ERS11140686">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140686</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538718</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.583</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_583_length_93202_cov_6.299329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_583_length_93202_cov_6.299329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738651_bin.237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UBA5905;s__UBA5905 sp900763035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538719" accession="ERS11140687">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140687</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538719</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.9327</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_9327_length_13731_cov_15.800352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_9327_length_13731_cov_15.800352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.9327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538720" accession="ERS11140688">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140688</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538720</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_provirus.12</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>180.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_12_length_439879_cov_101.892911_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738571_bin.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_12_length_439879_cov_101.892911_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-877;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_provirus.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538722" accession="ERS11140690">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140690</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538722</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_provirus.290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.5208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_290_length_146174_cov_19.915577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738571_bin.228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7317073170731707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_290_length_146174_cov_19.915577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0315825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538723" accession="ERS11140691">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140691</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538723</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_provirus.563</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_563_length_104102_cov_7.036376_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738571_bin.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_563_length_104102_cov_7.036376_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UBA737;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_provirus.563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538724" accession="ERS11140692">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140692</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538724</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_provirus.958</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.32309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_958_length_75370_cov_5.128711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738571_bin.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_958_length_75370_cov_5.128711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__UBA1234;g__UBA1234;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_provirus.958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538725" accession="ERS11140693">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140693</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538725</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.1317</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.0352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_1317_length_61627_cov_39.084484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4838709677419355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_1317_length_61627_cov_39.084484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_provirus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538726" accession="ERS11140694">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140694</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538726</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.1642</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_1642_length_53493_cov_5.595365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_1642_length_53493_cov_5.595365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538727" accession="ERS11140695">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140695</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538727</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.2030</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.23805382343288484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.0065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_2030_length_45690_cov_14.496525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3636363636363637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_2030_length_45690_cov_14.496525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.2030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538728" accession="ERS11140696">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140696</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538728</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.2393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_2393_length_40713_cov_9.352077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_2393_length_40713_cov_9.352077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.2393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538729" accession="ERS11140697">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140697</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538729</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.2713</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>722.488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_2713_length_37223_cov_409.093684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_2713_length_37223_cov_409.093684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_virus.852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538731" accession="ERS11140699">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140699</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538731</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.3997</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0255538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>123.671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_3997_length_27828_cov_70.889806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738571_bin.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_3997_length_27828_cov_70.889806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.1123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538732" accession="ERS11140700">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140700</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538732</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.5021</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02673512386358416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.4111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_5021_length_23200_cov_13.565973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_5021_length_23200_cov_13.565973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538733" accession="ERS11140701">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140701</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538733</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.597</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2047.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_597_length_100710_cov_1135.381773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9066666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_597_length_100710_cov_1135.381773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538734" accession="ERS11140702">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140702</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538734</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.8421</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacillus phage Thornton virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03243392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.46585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_8421_length_15305_cov_4.617153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_8421_length_15305_cov_4.617153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME237505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__HGM11412;s__HGM11412 sp900770185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.8421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae; Northropvirinae; Claudivirus; unclassified Claudivirus; Bacillus phage Thornton</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538735" accession="ERS11140703">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140703</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538735</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_provirus.112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_112_length_176715_cov_9.484267_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.979591836734694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_112_length_176715_cov_9.484267_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME212098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus lutetiensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_provirus.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538737" accession="ERS11140705">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140705</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538737</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_provirus.532</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Streptococcus phage Javan284 provirus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_532_length_85151_cov_8.557080_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.559322033898305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_532_length_85151_cov_8.557080_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME212098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus lutetiensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_provirus.513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Streptococcus phage Javan284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538738" accession="ERS11140706">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140706</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538738</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_provirus.8</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn provirus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.5615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_8_length_472117_cov_13.594306_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_8_length_472117_cov_13.594306_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0319907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538739" accession="ERS11140707">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140707</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538739</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.11117</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.17703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_11117_length_8205_cov_3.444267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_11117_length_8205_cov_3.444267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea;s__Dorea formicigenerans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.6102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538740" accession="ERS11140708">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140708</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538740</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.1276</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.73886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1276_length_46490_cov_5.566371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1276_length_46490_cov_5.566371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.1276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538741" accession="ERS11140709">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140709</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538741</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.1665</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Kayfunavirus virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.0449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1665_length_39255_cov_15.090076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6530612244897959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1665_length_39255_cov_15.090076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.1665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae; Studiervirinae; Kayfunavirus; unclassified Kayfunavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538742" accession="ERS11140710">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140710</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538742</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.1804</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.87837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1804_length_36712_cov_4.954224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1804_length_36712_cov_4.954224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538744" accession="ERS11140712">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140712</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538744</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.2208</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7349118942731276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_2208_length_31440_cov_7.474157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_2208_length_31440_cov_7.474157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-4420035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538745" accession="ERS11140713">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140713</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538745</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.302</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_302_length_113289_cov_6.990540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_302_length_113289_cov_6.990540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538746" accession="ERS11140714">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140714</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538746</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.4714</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_4714_length_16841_cov_8.072417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_4714_length_16841_cov_8.072417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0156279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538747" accession="ERS11140715">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140715</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538747</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.76</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>96.636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_76_length_214686_cov_56.466448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_76_length_214686_cov_56.466448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME150720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp003526955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0379585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538748" accession="ERS11140716">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140716</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538748</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_provirus.1246</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.355272547781255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1246_length_49334_cov_29.774672_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1246_length_49334_cov_29.774672_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738605_provirus.1182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538750" accession="ERS11140718">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140718</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538750</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_provirus.298</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>162.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_298_length_107450_cov_96.466570_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738574_bin.50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_298_length_107450_cov_96.466570_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0296258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538751" accession="ERS11140719">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140719</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538751</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_provirus.60</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.1429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_60_length_204646_cov_53.559400_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738574_bin.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_60_length_204646_cov_53.559400_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745937_provirus.694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538752" accession="ERS11140720">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140720</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538752</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.1075</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.1693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1075_length_54417_cov_33.276445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6071428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1075_length_54417_cov_33.276445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738256_provirus.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538753" accession="ERS11140721">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140721</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538753</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.1409</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538753</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>151.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1409_length_45950_cov_91.431343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1409_length_45950_cov_91.431343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538754" accession="ERS11140722">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140722</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538754</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.1693</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.6202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1693_length_41629_cov_11.843064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42105263157894735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_1693_length_41629_cov_11.843064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_virus.1341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538755" accession="ERS11140723">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140723</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538755</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.2099</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_2099_length_36602_cov_9.968296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8387096774193549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_2099_length_36602_cov_9.968296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738574_virus.2099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538756" accession="ERS11140724">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140724</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538756</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.2428</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.45692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_2428_length_33114_cov_5.425614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738574_bin.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_2428_length_33114_cov_5.425614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738574_virus.2428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538757" accession="ERS11140725">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140725</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538757</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.3339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_3339_length_26646_cov_7.821559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738574_bin.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_3339_length_26646_cov_7.821559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_provirus.265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538759" accession="ERS11140727">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140727</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538759</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.655</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.3409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_655_length_70997_cov_40.251452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_655_length_70997_cov_40.251452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738155_virus.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538760" accession="ERS11140728">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140728</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538760</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.9152</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.34084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_9152_length_12317_cov_4.673284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_9152_length_12317_cov_4.673284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.5896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538761" accession="ERS11140729">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140729</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538761</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_provirus.1536</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu provirus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25195538818076474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.8264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1536_length_42868_cov_4.744339_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1536_length_42868_cov_4.744339_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538762" accession="ERS11140730">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140730</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538762</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_provirus.5</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.0219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_5_length_743725_cov_46.283211_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738575_bin.139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5604395604395604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_5_length_743725_cov_46.283211_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0378236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538763" accession="ERS11140731">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140731</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538763</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_provirus.895</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.6849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_895_length_68621_cov_20.999095_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9583333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_895_length_68621_cov_20.999095_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_provirus.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538765" accession="ERS11140733">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140733</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538765</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_virus.1391</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.5519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1391_length_47612_cov_12.856422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1391_length_47612_cov_12.856422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738575_virus.1391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538766" accession="ERS11140734">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140734</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538766</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_virus.1743</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00743392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.33827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1743_length_38309_cov_5.174487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1743_length_38309_cov_5.174487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0327529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538767" accession="ERS11140735">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140735</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538767</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_virus.1849</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.2517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1849_length_36335_cov_41.447432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_1849_length_36335_cov_41.447432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0281755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538768" accession="ERS11140736">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140736</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538768</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_virus.2247</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>413.129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_2247_length_30195_cov_237.626668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_2247_length_30195_cov_237.626668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.1491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538769" accession="ERS11140737">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140737</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538769</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_virus.3728</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>229.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_3728_length_17898_cov_129.664553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_3728_length_17898_cov_129.664553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738575_virus.3728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538770" accession="ERS11140738">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140738</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538770</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_virus.758</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.4061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_758_length_77878_cov_37.659696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_758_length_77878_cov_37.659696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738575_virus.758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538771" accession="ERS11140739">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140739</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538771</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_provirus.1488</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vegasvirus provirus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.23306637662361993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.5811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1488_length_40094_cov_30.831796_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738576_bin.220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1488_length_40094_cov_30.831796_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_provirus.1488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Gochnauervirinae; Vegasvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538773" accession="ERS11140741">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140741</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538773</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_provirus.700</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_700_length_66255_cov_13.511046_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_700_length_66255_cov_13.511046_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_provirus.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538774" accession="ERS11140742">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140742</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538774</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.1308</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>188.447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1308_length_43785_cov_108.559737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1308_length_43785_cov_108.559737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538775" accession="ERS11140743">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140743</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538775</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.1442</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.31278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1442_length_40933_cov_4.083953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1442_length_40933_cov_4.083953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738472_bin.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900557405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538776" accession="ERS11140744">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140744</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538776</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.1552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>77.8386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1552_length_39006_cov_45.097870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1552_length_39006_cov_45.097870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538777" accession="ERS11140745">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140745</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538777</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.1716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1716_length_36208_cov_10.450278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8620689655172413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_1716_length_36208_cov_10.450278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538779" accession="ERS11140747">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140747</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538779</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.3244</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_3244_length_22260_cov_11.126403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_3244_length_22260_cov_11.126403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.3244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538780" accession="ERS11140748">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140748</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538780</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.3136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_393_length_96541_cov_32.156877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_393_length_96541_cov_32.156877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538781" accession="ERS11140749">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140749</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538781</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.5104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_489_length_84873_cov_31.165527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_489_length_84873_cov_31.165527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538782" accession="ERS11140750">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140750</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538782</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.7324</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0088092263283177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.4246200000000009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_7324_length_11513_cov_4.901102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_7324_length_11513_cov_4.901102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.7324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538783" accession="ERS11140751">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140751</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538783</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.932</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_932_length_55887_cov_11.789679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5609756097560976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_932_length_55887_cov_11.789679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-791;s__CAG-791 sp000431495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538784" accession="ERS11140752">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140752</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538784</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_provirus.319</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.877477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_319_length_78337_cov_14.816611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_319_length_78337_cov_14.816611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538785" accession="ERS11140753">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140753</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538785</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.1051</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.0084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_1051_length_41896_cov_15.027547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_1051_length_41896_cov_15.027547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_provirus.617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538787" accession="ERS11140755">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140755</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538787</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.0822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_145_length_113766_cov_13.420041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.926829268292683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_145_length_113766_cov_13.420041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_provirus.208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538788" accession="ERS11140756">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140756</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538788</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.1820</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06384348183251046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_1820_length_30467_cov_10.597203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_1820_length_30467_cov_10.597203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538789" accession="ERS11140757">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140757</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538789</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.2390</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.06308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_2390_length_25959_cov_5.169500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_2390_length_25959_cov_5.169500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0298456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538790" accession="ERS11140758">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140758</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538790</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.306</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3148.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_306_length_80952_cov_1732.150776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5483870967741935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_306_length_80952_cov_1732.150776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538791" accession="ERS11140759">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140759</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538791</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.41</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.22204294689013696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.4184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_41_length_189709_cov_48.572187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_41_length_189709_cov_48.572187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538793" accession="ERS11140761">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140761</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538793</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.832</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.877477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.2129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_832_length_47463_cov_16.652999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_832_length_47463_cov_16.652999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538794" accession="ERS11140762">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140762</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538794</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_provirus.116</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.4494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_116_length_213183_cov_53.163989_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_116_length_213183_cov_53.163989_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738578_provirus.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538795" accession="ERS11140763">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140763</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538795</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_provirus.2013</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2013_length_43459_cov_5.814577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738578_bin.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2013_length_43459_cov_5.814577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium infantis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0130534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538796" accession="ERS11140764">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140764</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538796</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_provirus.532</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.4862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_532_length_107225_cov_40.245632_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738578_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8717948717948718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_532_length_107225_cov_40.245632_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738578_provirus.532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538797" accession="ERS11140765">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140765</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538797</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_provirus.897</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17230176211453746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.2121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_897_length_78000_cov_15.162827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738578_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_897_length_78000_cov_15.162827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides ovatus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738578_provirus.897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538798" accession="ERS11140766">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140766</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538798</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.1329</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>318.456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_1329_length_59222_cov_179.710508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_1329_length_59222_cov_179.710508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538799" accession="ERS11140767">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140767</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538799</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.1727</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.34749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_1727_length_48572_cov_5.158140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_1727_length_48572_cov_5.158140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0368262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538800" accession="ERS11140768">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140768</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538800</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.2087</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>245.658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2087_length_42424_cov_141.181052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2087_length_42424_cov_141.181052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0308466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538802" accession="ERS11140770">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140770</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538802</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.2566</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07995594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.2716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2566_length_35224_cov_22.774518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2566_length_35224_cov_22.774518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738549_virus.918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538803" accession="ERS11140771">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140771</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538803</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.2897</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06369765791341378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>173.213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2897_length_31329_cov_98.672981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2897_length_31329_cov_98.672981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538804" accession="ERS11140772">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140772</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538804</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.3966</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0490368859781216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_3966_length_23012_cov_12.678657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_3966_length_23012_cov_12.678657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738578_virus.3966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538805" accession="ERS11140773">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140773</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538805</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.6298</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3492241221920916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_6298_length_13802_cov_7.397231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_6298_length_13802_cov_7.397231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738578_virus.6298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538806" accession="ERS11140774">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140774</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538806</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_provirus.1328</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17360358568359646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.5269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_1328_length_46309_cov_18.481398_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_1328_length_46309_cov_18.481398_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538808" accession="ERS11140776">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140776</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538808</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_provirus.601</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>226.284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_601_length_73852_cov_158.731400_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738579_bin.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_601_length_73852_cov_158.731400_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_provirus.800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538809" accession="ERS11140777">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140777</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538809</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_virus.1196</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_1196_length_49481_cov_12.439985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_1196_length_49481_cov_12.439985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538810" accession="ERS11140778">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140778</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538810</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_virus.144</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.6011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_144_length_150236_cov_26.043008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.734375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_144_length_150236_cov_26.043008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738579_virus.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538543" accession="ERS11140512">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140512</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538543</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738557_provirus.700</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738557.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738557) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567920) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_700_length_81334_cov_8.497373_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_12_1556__NODE_700_length_81334_cov_8.497373_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538649" accession="ERS11140618">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140618</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538649</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738565_virus.4922</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04343334073279703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567905) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_4922_length_20389_cov_7.422263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_14_1557__NODE_4922_length_20389_cov_7.422263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538657" accession="ERS11140626">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140626</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538657</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.1518</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.0495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1518_length_43787_cov_11.171837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_1518_length_43787_cov_11.171837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.1518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538663" accession="ERS11140632">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140632</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538663</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738566_virus.440</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738566.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>268.011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738566) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561745) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_440_length_89631_cov_150.267593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4827586206896552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_H_9_1235__NODE_440_length_89631_cov_150.267593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738566_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538673" accession="ERS11140641">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140641</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538673</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.2239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.5894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_2239_length_39181_cov_13.442052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_2239_length_39181_cov_13.442052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.2969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538679" accession="ERS11140647">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140647</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538679</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738567_virus.9149</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738567.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.15601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738567) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561222) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_9149_length_12976_cov_2.778665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_14_1620__NODE_9149_length_12976_cov_2.778665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME017973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__UMGS124;g__UMGS124;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.3072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538686" accession="ERS11140654">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140654</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538686</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.2004</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>4.9472295514511e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.2841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2004_length_42341_cov_31.066794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_2004_length_42341_cov_31.066794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538692" accession="ERS11140660">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140660</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538692</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738568_virus.750</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738568.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.4953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738568) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567702) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_750_length_77673_cov_20.857132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5365853658536586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_16_2355__NODE_750_length_77673_cov_20.857132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_virus.750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538701" accession="ERS11140669">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140669</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538701</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738569_virus.3231</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738569.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738569) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567755) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_3231_length_24333_cov_9.381473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_10_2277__NODE_3231_length_24333_cov_9.381473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_provirus.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538707" accession="ERS11140675">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140675</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538707</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_provirus.364</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01110955218654152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_364_length_119275_cov_10.889931_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_364_length_119275_cov_10.889931_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_provirus.352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538715" accession="ERS11140683">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140683</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538715</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738570_virus.3146</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738570.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.9812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738570) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_3146_length_31501_cov_22.932854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_21_1974__NODE_3146_length_31501_cov_22.932854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0282086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538721" accession="ERS11140689">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140689</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538721</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_provirus.1778</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona provirus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_1778_length_50361_cov_6.269827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738571_bin.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5652173913043478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_1778_length_50361_cov_6.269827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.1666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538730" accession="ERS11140698">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140698</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538730</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738571_virus.319</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738571.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738571) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560598) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_319_length_139155_cov_8.310617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7906976744186046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_2_1640__NODE_319_length_139155_cov_8.310617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538736" accession="ERS11140704">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140704</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538736</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_provirus.207</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00128438135477076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>927.605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_207_length_135363_cov_533.228649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738572_bin.274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_207_length_135363_cov_533.228649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538743" accession="ERS11140711">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140711</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538743</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738572_virus.1896</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738572.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738572) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567535) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1896_length_35323_cov_6.593713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_3_THA1060YZ__NODE_1896_length_35323_cov_6.593713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME256624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-272;s__CAG-272 sp000433515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.1896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538749" accession="ERS11140717">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140717</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538749</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_provirus.178</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.3734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_178_length_131002_cov_17.813007_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_178_length_131002_cov_17.813007_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_provirus.800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538758" accession="ERS11140726">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140726</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538758</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738574_virus.4561</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738574.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.45037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738574) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567876) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_4561_length_21146_cov_3.502160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_22_1305__NODE_4561_length_21146_cov_3.502160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738614_provirus.423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538764" accession="ERS11140732">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140732</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538764</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738575_virus.124</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738575.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.9726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738575) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567530) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_124_length_229456_cov_21.976842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_7_THA1072JZ__NODE_124_length_229456_cov_21.976842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738575_virus.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538772" accession="ERS11140740">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140740</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538772</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_provirus.31_2</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>123.993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_31_length_310458_cov_76.108525_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_31_length_310458_cov_76.108525_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Mitsuokella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_provirus.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538778" accession="ERS11140746">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140746</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538778</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738576_virus.2208</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738576.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15137530562347187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738576) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_2208_length_30018_cov_5.863331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_I_3_1743__NODE_2208_length_30018_cov_5.863331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.2208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538786" accession="ERS11140754">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140754</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538786</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.1152</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_1152_length_39746_cov_10.141723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_1152_length_39746_cov_10.141723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0347707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538792" accession="ERS11140760">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140760</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538792</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738577_virus.635</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.8045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560750) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_635_length_55501_cov_38.784552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_23_1730__NODE_635_length_55501_cov_38.784552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0347583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538801" accession="ERS11140769">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140769</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538801</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738578_virus.2396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738578.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02549472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.4905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738578) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558882) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2396_length_37485_cov_21.828352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9807692307692308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_A_5_1019__NODE_2396_length_37485_cov_21.828352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0367225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538807" accession="ERS11140775">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140775</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538807</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_provirus.27_2</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.5469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_27_length_281367_cov_37.685350_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738579_bin.196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_27_length_281367_cov_37.685350_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_virus.1666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538811" accession="ERS11140779">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140779</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538811</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_virus.19682</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.10007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_19682_length_4277_cov_3.741667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_19682_length_4277_cov_3.741667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738246_bin.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Ruminococcus_A;s__Ruminococcus_A faecicola</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738579_virus.19682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538812" accession="ERS11140780">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140780</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538812</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_virus.312</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03799472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>316.097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_312_length_104065_cov_178.853666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9699248120300752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_312_length_104065_cov_178.853666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0369190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538813" accession="ERS11140781">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140781</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538813</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_virus.407</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>439.022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_407_length_91712_cov_253.719900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738579_bin.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_407_length_91712_cov_253.719900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0361433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538814" accession="ERS11140782">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140782</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538814</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738579_virus.864</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738579.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7379947229551451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738579) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559355) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_864_length_60712_cov_8.939441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_6_2620__NODE_864_length_60712_cov_8.939441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_virus.633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538815" accession="ERS11140783">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140783</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538815</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_provirus.1361</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11369765791341375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.7135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1361_length_51449_cov_4.736607_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738580_bin.103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1361_length_51449_cov_4.736607_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738580_bin.103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.1361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538816" accession="ERS11140784">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140784</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538816</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_provirus.1942</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.26572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1942_length_40407_cov_5.075824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738580_bin.203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1942_length_40407_cov_5.075824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Lentisphaeria;o__Victivallales;f__UBA1829;g__UBA1829;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.2131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538817" accession="ERS11140785">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140785</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538817</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_provirus.366</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.30247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>356.617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_366_length_109384_cov_201.050884_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738580_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_366_length_109384_cov_201.050884_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538818" accession="ERS11140786">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140786</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538818</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_provirus.8</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_8_length_550033_cov_10.106561_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738580_bin.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.323943661971831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_8_length_550033_cov_10.106561_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738186_bin.194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp000431775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538819" accession="ERS11140787">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140787</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538819</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.1239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Enterobacteria phage mEp021 virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1239_length_54587_cov_6.556320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8641975308641975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1239_length_54587_cov_6.556320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_virus.1239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Enterobacteria phage mEp021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538820" accession="ERS11140788">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140788</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538820</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.1498</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:56Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1498_length_48194_cov_5.535383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1498_length_48194_cov_5.535383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538821" accession="ERS11140789">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140789</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538821</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.1732</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1732_length_43516_cov_9.376390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_1732_length_43516_cov_9.376390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745908_provirus.390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538822" accession="ERS11140790">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140790</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538822</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.88869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_190_length_152594_cov_5.471731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_190_length_152594_cov_5.471731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_virus.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538823" accession="ERS11140791">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140791</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538823</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.2000</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.49634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2000_length_39448_cov_3.996749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2000_length_39448_cov_3.996749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.1684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538824" accession="ERS11140792">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140792</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538824</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.2198</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.5653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2198_length_36728_cov_23.301984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2198_length_36728_cov_23.301984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME011747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-382;g__UMGS882;s__UMGS882 sp900546385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538825" accession="ERS11140793">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140793</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538825</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.2334</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2334_length_35123_cov_9.224277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7096774193548387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2334_length_35123_cov_9.224277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538826" accession="ERS11140794">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140794</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538826</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.2720</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.78784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2720_length_31273_cov_4.845397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_2720_length_31273_cov_4.845397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538827" accession="ERS11140795">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140795</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538827</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.3389</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9049559471365638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.17317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_3389_length_25876_cov_3.162875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738580_bin.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_3389_length_25876_cov_3.162875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_virus.3389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538828" accession="ERS11140796">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140796</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538828</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.3906</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.4056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_3906_length_22962_cov_31.769412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_3906_length_22962_cov_31.769412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_virus.3906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538829" accession="ERS11140797">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140797</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538829</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.5092</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04245594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.6612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_5092_length_17951_cov_4.859013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_5092_length_17951_cov_4.859013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738575_virus.1911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538830" accession="ERS11140798">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140798</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538830</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738580_virus.681</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Enterococcus phage EF5 virus assembled from ERR7738580.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13997797356828195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.54292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738580) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_681_length_79113_cov_4.695253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_17_CHE1007SZ__NODE_681_length_79113_cov_4.695253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0375669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Enterococcus phage EF5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538831" accession="ERS11140799">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140799</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538831</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_provirus.1025</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1025_length_72702_cov_7.848950_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1025_length_72702_cov_7.848950_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_provirus.1025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538832" accession="ERS11140800">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140800</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538832</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_provirus.1614</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>112.678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1614_length_53263_cov_63.469522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1614_length_53263_cov_63.469522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_provirus.881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538833" accession="ERS11140801">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140801</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538833</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_provirus.2563</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>8.6317135549872e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.0985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2563_length_38019_cov_20.773839_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2563_length_38019_cov_20.773839_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__UBA1234;g__UBA1234;s__UBA1234 sp900753135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745859_provirus.2658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538834" accession="ERS11140802">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140802</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538834</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_provirus.876</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_876_length_80209_cov_9.875480_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738581_bin.148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_876_length_80209_cov_9.875480_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_provirus.876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538835" accession="ERS11140803">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140803</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538835</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.109</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_109_length_214274_cov_6.762770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7424242424242424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_109_length_214274_cov_6.762770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538836" accession="ERS11140804">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140804</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538836</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.1357</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.1197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1357_length_59729_cov_41.349527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1357_length_59729_cov_41.349527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538837" accession="ERS11140805">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140805</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538837</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.1494</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.2379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1494_length_56168_cov_17.076857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1494_length_56168_cov_17.076857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745859_virus.1542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538838" accession="ERS11140806">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140806</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538838</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.1793</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1193047942582454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.1657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1793_length_49562_cov_17.709508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7352941176470589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_1793_length_49562_cov_17.709508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538839" accession="ERS11140807">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140807</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538839</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.2162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.19772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2162_length_43232_cov_4.964199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2162_length_43232_cov_4.964199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME158868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900549635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.2162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538840" accession="ERS11140808">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140808</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538840</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.2422</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2422_length_39688_cov_8.173411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2422_length_39688_cov_8.173411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.2422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538841" accession="ERS11140809">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140809</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538841</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.2778</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2778_length_35581_cov_8.419220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_2778_length_35581_cov_8.419220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.1612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538842" accession="ERS11140810">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140810</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538842</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.3090</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.50253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_3090_length_32713_cov_5.128723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_3090_length_32713_cov_5.128723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME234270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UBA1740;s__UBA1740 sp900767415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.3090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538843" accession="ERS11140811">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140811</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538843</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.3788</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08436123348017628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_3788_length_27716_cov_9.150367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738581_bin.141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_3788_length_27716_cov_9.150367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.1859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538844" accession="ERS11140812">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140812</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538844</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.4513</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.22815620873411188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.96343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_4513_length_23861_cov_4.450555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_4513_length_23861_cov_4.450555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0335721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538845" accession="ERS11140813">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140813</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538845</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.1323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_549_length_104081_cov_104.659446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7936507936507936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_549_length_104081_cov_104.659446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538846" accession="ERS11140814">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140814</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538846</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.660</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.7989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_660_length_93738_cov_27.204098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_660_length_93738_cov_27.204098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538847" accession="ERS11140815">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140815</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538847</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.8127</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_8127_length_14464_cov_5.406548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_8127_length_14464_cov_5.406548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738662_bin.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RFN20;f__CAG-449;g__CAG-449;s__CAG-449 sp000432895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.8127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538848" accession="ERS11140816">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140816</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538848</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738581_virus.96</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7738581.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.9086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738581) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_96_length_227439_cov_12.871359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_24_1168__NODE_96_length_227439_cov_12.871359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia;s__Blautia stercoris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538849" accession="ERS11140817">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140817</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538849</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_provirus.240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.0081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_240_length_184187_cov_57.591103_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738582_bin.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_240_length_184187_cov_57.591103_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538850" accession="ERS11140818">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140818</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538850</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_provirus.446</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00134348183251044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>217.789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_446_length_137250_cov_128.923935_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_446_length_137250_cov_128.923935_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_virus.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538851" accession="ERS11140819">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140819</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538851</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_provirus.858</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.0929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_858_length_89766_cov_21.900478_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738582_bin.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_858_length_89766_cov_21.900478_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738275_provirus.129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538852" accession="ERS11140820">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140820</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538852</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.1255</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>770.557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_1255_length_68355_cov_455.201529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_1255_length_68355_cov_455.201529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0344814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538853" accession="ERS11140821">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140821</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538853</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.1552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.4049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_1552_length_58511_cov_45.063798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_1552_length_58511_cov_45.063798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME011266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1696;s__UMGS1696 sp900554225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.1552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538854" accession="ERS11140822">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140822</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538854</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.2108</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.0286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2108_length_45930_cov_21.412034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2108_length_45930_cov_21.412034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.2108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538855" accession="ERS11140823">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140823</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538855</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.2401</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lugh virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2401_length_41256_cov_10.542170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5128205128205128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2401_length_41256_cov_10.542170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME248826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_D</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.2401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lughvirus; Faecalibacterium virus Lugh</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538856" accession="ERS11140824">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140824</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538856</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.2673</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.3464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2673_length_37456_cov_45.385805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2673_length_37456_cov_45.385805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0353882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538857" accession="ERS11140825">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140825</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538857</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.2901</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2901_length_34975_cov_11.536105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_2901_length_34975_cov_11.536105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-448;s__CAG-448 sp003150135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.2901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538858" accession="ERS11140826">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140826</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538858</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03050977056944868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>134.777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_351_length_155323_cov_80.269295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6792452830188679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_351_length_155323_cov_80.269295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538859" accession="ERS11140827">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140827</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538859</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.3830</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.83076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_3830_length_27650_cov_5.725238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_3830_length_27650_cov_5.725238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.3830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538860" accession="ERS11140828">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140828</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538860</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.474</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>146.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_474_length_132872_cov_86.870123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7317073170731707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_474_length_132872_cov_86.870123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538861" accession="ERS11140829">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140829</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538861</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738582_virus.7161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.6367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567635) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_7161_length_15870_cov_9.874565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_7_RAU0027AZ__NODE_7161_length_15870_cov_9.874565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME258542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__CAG-145;s__CAG-145 sp900754795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.7161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538862" accession="ERS11140830">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140830</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538862</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738583_provirus.1377</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu provirus assembled from ERR7738583.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3274779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738583) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567560) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_1377_length_39744_cov_9.918345_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_1377_length_39744_cov_9.918345_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538863" accession="ERS11140831">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140831</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538863</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738583_provirus.53</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738583.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>111.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738583) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567560) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_53_length_254025_cov_63.282767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738583_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_53_length_254025_cov_63.282767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745908_provirus.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538864" accession="ERS11140832">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140832</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538864</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738583_virus.1128</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738583.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>177.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738583) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567560) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_1128_length_46624_cov_119.787355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_1128_length_46624_cov_119.787355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738583_virus.1128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538865" accession="ERS11140833">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140833</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538865</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738583_virus.1411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738583.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.20342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738583) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567560) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_1411_length_39114_cov_4.991700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_1411_length_39114_cov_4.991700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0300685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538866" accession="ERS11140834">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140834</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538866</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738583_virus.211</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738583.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738583) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567560) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_211_length_126009_cov_9.939086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_211_length_126009_cov_9.939086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME233192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Ligilactobacillus;s__Ligilactobacillus ruminis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738583_virus.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538867" accession="ERS11140835">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140835</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538867</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738583_virus.495</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738583.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.1749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738583) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567560) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_495_length_80647_cov_26.931488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6458333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_495_length_80647_cov_26.931488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745908_virus.609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538868" accession="ERS11140836">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140836</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538868</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738583_virus.99</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738583.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02241189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.4271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738583) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567560) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_99_length_189344_cov_46.732980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5147058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_15_THA1075YZ__NODE_99_length_189344_cov_46.732980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola dorei</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0379517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538869" accession="ERS11140837">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140837</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538869</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_provirus.1437</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>202.241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1437_length_57534_cov_117.791305_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1437_length_57534_cov_117.791305_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_provirus.1222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538870" accession="ERS11140838">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140838</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538870</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_provirus.197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>319.253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_197_length_155714_cov_195.240438_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738584_bin.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>46.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_197_length_155714_cov_195.240438_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_provirus.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538871" accession="ERS11140839">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140839</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538871</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_provirus.353</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.0951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_353_length_121529_cov_22.832090_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738584_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_353_length_121529_cov_22.832090_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0226686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538872" accession="ERS11140840">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140840</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538872</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_provirus.515</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe provirus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.2955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_515_length_101886_cov_25.284120_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_515_length_101886_cov_25.284120_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538873" accession="ERS11140841">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140841</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538873</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_provirus.906</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.5135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_906_length_75063_cov_17.012322_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_906_length_75063_cov_17.012322_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738604_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_virus.311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538874" accession="ERS11140842">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140842</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538874</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.12662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.130873179909068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.94175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_12662_length_11755_cov_5.401610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_12662_length_11755_cov_5.401610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-882;s__CAG-882 sp000435595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.12662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538875" accession="ERS11140843">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140843</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538875</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.1526</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>677.945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1526_length_55889_cov_387.198488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8787878787878788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1526_length_55889_cov_387.198488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.1503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538876" accession="ERS11140844">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140844</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538876</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.1720</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2138578943106438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1801.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1720_length_51630_cov_1757.820748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1720_length_51630_cov_1757.820748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.1720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538877" accession="ERS11140845">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140845</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538877</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.1964</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1964_length_47225_cov_10.916052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738584_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_1964_length_47225_cov_10.916052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.1796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538878" accession="ERS11140846">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140846</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538878</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.2187</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.1941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2187_length_43948_cov_19.550705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2187_length_43948_cov_19.550705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.2187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538879" accession="ERS11140847">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140847</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538879</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.2341</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08491189427312776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.8288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2341_length_42165_cov_38.164394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2341_length_42165_cov_38.164394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538880" accession="ERS11140848">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140848</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538880</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.2532</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13180120315873836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.5239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2532_length_40335_cov_21.579612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2532_length_40335_cov_21.579612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0347707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538881" accession="ERS11140849">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140849</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538881</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.2792</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2792_length_37608_cov_7.061709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_2792_length_37608_cov_7.061709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.2385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538882" accession="ERS11140850">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140850</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538882</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.3016</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_3016_length_35688_cov_9.713207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_3016_length_35688_cov_9.713207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UMGS1889;s__UMGS1889 sp900556055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.2463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538883" accession="ERS11140851">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140851</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538883</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.32</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.4152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_32_length_277933_cov_40.856336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8378378378378378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_32_length_277933_cov_40.856336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538884" accession="ERS11140852">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140852</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538884</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.3841</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_3841_length_30270_cov_5.922035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_3841_length_30270_cov_5.922035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.2220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538885" accession="ERS11140853">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140853</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538885</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.4339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>124.646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_4339_length_27808_cov_105.096174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_4339_length_27808_cov_105.096174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_provirus.883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538886" accession="ERS11140854">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140854</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538886</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.4817</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.6433700000000009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_4817_length_25728_cov_5.459085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_4817_length_25728_cov_5.459085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.1803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538887" accession="ERS11140855">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140855</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538887</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.55002</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Erwinia phage PEar6 virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.9202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_55002_length_3244_cov_40.552889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_55002_length_3244_cov_40.552889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_virus.21413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae; unclassified Inoviridae; Erwinia phage PEar6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538888" accession="ERS11140856">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140856</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538888</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.6062</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1884773935962332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.75407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_6062_length_21550_cov_4.402179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_6062_length_21550_cov_4.402179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.6062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538889" accession="ERS11140857">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140857</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538889</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.830</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.4063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_830_length_78860_cov_36.967785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_830_length_78860_cov_36.967785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538890" accession="ERS11140858">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140858</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538890</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738584_virus.93</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738584.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738584) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567998) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_93_length_197854_cov_61.148445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5303030303030303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_6_1188__NODE_93_length_197854_cov_61.148445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747083_bin.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__UMGS1994;s__UMGS1994 sp900556975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538891" accession="ERS11140859">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140859</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538891</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_provirus.1311</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>140.049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_1311_length_60739_cov_94.241041_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_1311_length_60739_cov_94.241041_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538892" accession="ERS11140860">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140860</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538892</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_provirus.226</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.6992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_226_length_179825_cov_17.728870_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738585_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_226_length_179825_cov_17.728870_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;s__Ruminiclostridium_E siraeum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_provirus.226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538893" accession="ERS11140861">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140861</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538893</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_provirus.43</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.6354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_43_length_343766_cov_41.781558_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738585_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_43_length_343766_cov_41.781558_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0305973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538894" accession="ERS11140862">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140862</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538894</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_provirus.630</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>240.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_630_length_102270_cov_157.948578_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_630_length_102270_cov_157.948578_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_provirus.630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538895" accession="ERS11140863">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140863</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538895</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_provirus.952</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.5126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_952_length_78680_cov_31.046818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738585_bin.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_952_length_78680_cov_31.046818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-877;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_provirus.952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538896" accession="ERS11140864">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140864</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538896</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_virus.1518</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>294.392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_1518_length_54116_cov_181.128648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_1518_length_54116_cov_181.128648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_virus.1518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538897" accession="ERS11140865">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140865</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538897</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_virus.1915</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.7141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_1915_length_44897_cov_38.209772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_1915_length_44897_cov_38.209772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_virus.391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538898" accession="ERS11140866">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140866</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538898</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_virus.2187</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_2187_length_40650_cov_8.481774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_2187_length_40650_cov_8.481774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_virus.2187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538899" accession="ERS11140867">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140867</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538899</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_virus.2629</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.3804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_2629_length_35062_cov_22.850364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_2629_length_35062_cov_22.850364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UBA6857;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_virus.2629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538900" accession="ERS11140868">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140868</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538900</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_virus.3309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11497797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.5338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_3309_length_28404_cov_31.614996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_3309_length_28404_cov_31.614996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_virus.3309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538901" accession="ERS11140869">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140869</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538901</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738585_virus.558</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738585.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738585) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567432) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_558_length_108983_cov_6.147347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_19_RAU1029Y__NODE_558_length_108983_cov_6.147347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002299635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738585_virus.558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538902" accession="ERS11140870">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140870</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538902</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_provirus.1004</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.4206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1004_length_51380_cov_28.296045_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738586_bin.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1004_length_51380_cov_28.296045_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_I;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.1004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538903" accession="ERS11140871">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140871</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538903</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_provirus.1516</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.2914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1516_length_40247_cov_42.847100_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1516_length_40247_cov_42.847100_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.1516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538904" accession="ERS11140872">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140872</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538904</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_provirus.229</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus provirus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.0412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_229_length_121569_cov_12.593356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7906976744186046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_229_length_121569_cov_12.593356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Enterobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538905" accession="ERS11140873">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140873</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538905</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_provirus.36_2</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.6045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_36_length_264990_cov_15.854262_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_36_length_264990_cov_15.854262_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738251_virus.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538906" accession="ERS11140874">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140874</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538906</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_provirus.772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>4.9472295514511e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_772_length_59872_cov_5.289238_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738586_bin.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8620689655172413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_772_length_59872_cov_5.289238_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738242_provirus.349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538907" accession="ERS11140875">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140875</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538907</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.1187</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1187_length_46498_cov_7.013873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1187_length_46498_cov_7.013873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538908" accession="ERS11140876">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140876</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538908</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.1519</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1519_length_40195_cov_13.851164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1519_length_40195_cov_13.851164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738188_virus.899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538909" accession="ERS11140877">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140877</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538909</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.1778</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.05078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1778_length_36797_cov_5.073094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_1778_length_36797_cov_5.073094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.1778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538910" accession="ERS11140878">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140878</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538910</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.2276</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.40272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_2276_length_32095_cov_3.816385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_2276_length_32095_cov_3.816385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_provirus.1739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538911" accession="ERS11140879">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140879</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538911</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.2840</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.92745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_2840_length_27994_cov_5.684887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_2840_length_27994_cov_5.684887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745403_virus.597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538912" accession="ERS11140880">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140880</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538912</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.3708</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_3708_length_23682_cov_9.269773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_3708_length_23682_cov_9.269773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.3531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538913" accession="ERS11140881">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140881</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538913</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.4600</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.9038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_4600_length_20837_cov_5.043786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_4600_length_20837_cov_5.043786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.4600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538914" accession="ERS11140882">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140882</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538914</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_675_length_64588_cov_11.971307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_675_length_64588_cov_11.971307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738566_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538915" accession="ERS11140883">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140883</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538915</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738586_virus.895</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738586.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738586) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567843) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_895_length_55021_cov_8.786492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4565217391304347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_6_1527__NODE_895_length_55021_cov_8.786492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538916" accession="ERS11140884">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140884</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538916</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_provirus.1897</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08243392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1897_length_44521_cov_9.138196_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1897_length_44521_cov_9.138196_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738271_bin.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp002296965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_provirus.1897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538917" accession="ERS11140885">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140885</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538917</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_provirus.371</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.3441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_371_length_106676_cov_10.530239_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738587_bin.244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_371_length_106676_cov_10.530239_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_provirus.371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538918" accession="ERS11140886">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140886</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538918</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_provirus.705</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.2881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_705_length_76378_cov_33.599953_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7407407407407407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_705_length_76378_cov_33.599953_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_provirus.705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538919" accession="ERS11140887">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140887</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538919</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.1144</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08986784140969165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.9502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1144_length_59343_cov_27.151368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8648648648648649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1144_length_59343_cov_27.151368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.1144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538920" accession="ERS11140888">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140888</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538920</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.1462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1462_length_52219_cov_10.551341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1462_length_52219_cov_10.551341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-127;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.1462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538921" accession="ERS11140889">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140889</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538921</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.1802</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1802_length_45826_cov_10.335046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6578947368421053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1802_length_45826_cov_10.335046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_provirus.1539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538922" accession="ERS11140890">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140890</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538922</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.1973</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.62319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1973_length_43432_cov_4.860616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_1973_length_43432_cov_4.860616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746349_bin.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__UMGS1883;f__UMGS1883;g__UMGS1883;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.1973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538923" accession="ERS11140891">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140891</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538923</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.2254</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.7983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_2254_length_40162_cov_12.498865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_2254_length_40162_cov_12.498865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Romboutsia;s__Romboutsia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.2254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538924" accession="ERS11140892">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140892</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538924</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.2443</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.80591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_2443_length_38164_cov_4.234857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_2443_length_38164_cov_4.234857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__UMGS124;g__UMGS124;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.2094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538925" accession="ERS11140893">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140893</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538925</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.260</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.0827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_260_length_127234_cov_10.381387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_260_length_127234_cov_10.381387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538926" accession="ERS11140894">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140894</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538926</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.2847</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.8049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_2847_length_34485_cov_29.259591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_2847_length_34485_cov_29.259591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME011266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1696;s__UMGS1696 sp900554225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.2847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538927" accession="ERS11140895">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140895</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538927</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.3240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_3240_length_31749_cov_6.193420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_3240_length_31749_cov_6.193420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.2306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538928" accession="ERS11140896">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140896</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538928</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.3884</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Henuseptimavirus virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0323237885462555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_3884_length_27928_cov_5.828337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5588235294117647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_3884_length_27928_cov_5.828337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0312761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Drexlerviridae; Tempevirinae; Henuseptimavirus; unclassified Henuseptimavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538929" accession="ERS11140897">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140897</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538929</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.4431</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05250921161364957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_4431_length_25532_cov_9.703202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_4431_length_25532_cov_9.703202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738277_bin.130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger sp900554145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538930" accession="ERS11140898">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140898</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538930</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.555</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_555_length_86763_cov_8.478612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_555_length_86763_cov_8.478612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738178_virus.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538931" accession="ERS11140899">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140899</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538931</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.7008</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.46888544708081953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.51487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_7008_length_18104_cov_4.869751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_7008_length_18104_cov_4.869751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-302;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.7008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538932" accession="ERS11140900">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140900</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538932</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738587_virus.869</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738587.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738587) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567913) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_869_length_68264_cov_10.043733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_20_1653__NODE_869_length_68264_cov_10.043733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538933" accession="ERS11140901">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140901</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538933</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_provirus.1210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.864977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.6975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1210_length_62896_cov_23.858928_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1210_length_62896_cov_23.858928_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538934" accession="ERS11140902">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140902</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538934</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_provirus.1670</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu provirus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.30247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.3822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1670_length_51920_cov_23.772062_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1670_length_51920_cov_23.772062_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538935" accession="ERS11140903">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140903</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538935</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_provirus.2581</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2581_length_38195_cov_35.150296_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2581_length_38195_cov_35.150296_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.1657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538936" accession="ERS11140904">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140904</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538936</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_provirus.4</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.5071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_4_length_773851_cov_29.079685_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738588_bin.304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_4_length_773851_cov_29.079685_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_provirus.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538937" accession="ERS11140905">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140905</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538937</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_provirus.876</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.5982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_876_length_76172_cov_22.298324_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_876_length_76172_cov_22.298324_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_provirus.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538938" accession="ERS11140906">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140906</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538938</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.11562</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.75814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_11562_length_10772_cov_2.973539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_11562_length_10772_cov_2.973539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.11562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538939" accession="ERS11140907">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140907</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538939</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.1482</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>4.9472295514511e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>236.219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1482_length_55861_cov_122.263050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1482_length_55861_cov_122.263050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746757_bin.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Elusimicrobiota;c__Elusimicrobia;o__Elusimicrobiales;f__Elusimicrobiaceae;g__UBA1436;s__UBA1436 sp900541355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.1482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538940" accession="ERS11140908">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140908</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538940</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.1954</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.2543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1954_length_46812_cov_21.283428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_1954_length_46812_cov_21.283428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738167_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFHK01;s__SFHK01 sp004556395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538941" accession="ERS11140909">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140909</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538941</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.2309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.5826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2309_length_41521_cov_17.829119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2309_length_41521_cov_17.829119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME171351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella parvula_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.2309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538942" accession="ERS11140910">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140910</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538942</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.2559</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02619765791341376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.2767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2559_length_38460_cov_30.038481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2559_length_38460_cov_30.038481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_provirus.1044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538943" accession="ERS11140911">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140911</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538943</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.2808</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.03718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2808_length_35921_cov_4.822676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_2808_length_35921_cov_4.822676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger formicilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0352455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538944" accession="ERS11140912">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140912</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538944</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.3055</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.2969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_3055_length_33786_cov_22.716189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_3055_length_33786_cov_22.716189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745432_bin.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RFN20;f__CAG-826;g__UBA733;s__UBA733 sp900767825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.3055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538945" accession="ERS11140913">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140913</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538945</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.371</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_371_length_122069_cov_29.822611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_371_length_122069_cov_29.822611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738630_bin.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_virus.200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538946" accession="ERS11140914">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140914</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538946</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.4402</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Yongloolinvirus virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.43283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_4402_length_25192_cov_3.845670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_4402_length_25192_cov_3.845670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.4402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Yongloolinvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538947" accession="ERS11140915">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140915</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538947</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.4971</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07564699712046227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.1368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_4971_length_22614_cov_18.951147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_4971_length_22614_cov_18.951147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1750;g__UBA7102;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.4971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538948" accession="ERS11140916">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140916</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538948</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738588_virus.6601</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561697) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_6601_length_17778_cov_10.500141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_4_1196__NODE_6601_length_17778_cov_10.500141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Slackia_A;s__Slackia_A isoflavoniconvertens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.6601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538949" accession="ERS11140917">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140917</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538949</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_provirus.1013</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.9388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1013_length_58073_cov_16.901459_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1013_length_58073_cov_16.901459_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_provirus.1013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538950" accession="ERS11140918">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140918</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538950</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_provirus.1818</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0920154185022027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1818_length_40216_cov_8.134283_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738591_bin.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1818_length_40216_cov_8.134283_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_provirus.1818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538951" accession="ERS11140919">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140919</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538951</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_provirus.653</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>102.902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_653_length_75457_cov_63.367127_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_653_length_75457_cov_63.367127_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME150720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp003526955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_provirus.653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538952" accession="ERS11140920">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140920</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538952</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.1158</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1158_length_53251_cov_4.575150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47368421052631576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1158_length_53251_cov_4.575150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME184321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.1158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538953" accession="ERS11140921">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140921</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538953</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.1520</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>199.463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1520_length_44862_cov_116.829608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738591_bin.222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1520_length_44862_cov_116.829608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.1976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538954" accession="ERS11140922">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140922</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538954</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.1628</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2503.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1628_length_42751_cov_1449.825186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1628_length_42751_cov_1449.825186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.1628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538955" accession="ERS11140923">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140923</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538955</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.1699</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>355.048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1699_length_41852_cov_208.359186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5357142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1699_length_41852_cov_208.359186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.1699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538956" accession="ERS11140924">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140924</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538956</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.1851</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.9894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1851_length_39715_cov_33.249306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_1851_length_39715_cov_33.249306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruthenibacterium;s__Ruthenibacterium lactatiformans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.1851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538957" accession="ERS11140925">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140925</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538957</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>172.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_197_length_144752_cov_101.775048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_197_length_144752_cov_101.775048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__HGM11416;s__HGM11416 sp900766495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538958" accession="ERS11140926">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140926</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538958</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.2159</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.6389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_2159_length_36377_cov_49.679559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_2159_length_36377_cov_49.679559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.2159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538959" accession="ERS11140927">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140927</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538959</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.2348</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.6721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_2348_length_34283_cov_18.906625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_2348_length_34283_cov_18.906625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.2348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538960" accession="ERS11140928">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140928</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538960</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.2729</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_2729_length_30992_cov_6.340417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_2729_length_30992_cov_6.340417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.2729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538961" accession="ERS11140929">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140929</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538961</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.325</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.7707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_325_length_108084_cov_17.306313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5686274509803921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_325_length_108084_cov_17.306313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538962" accession="ERS11140930">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140930</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538962</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.42</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>204.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_42_length_257669_cov_126.627605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_42_length_257669_cov_126.627605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538963" accession="ERS11140931">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140931</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538963</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.4763</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_4763_length_20577_cov_8.393317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738591_bin.320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_4763_length_20577_cov_8.393317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.4763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538964" accession="ERS11140932">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140932</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538964</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.5522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1918.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_5522_length_18302_cov_1120.129931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_5522_length_18302_cov_1120.129931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.5522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538965" accession="ERS11140933">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140933</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538965</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.6668</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_6668_length_15931_cov_12.574240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_6668_length_15931_cov_12.574240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.6668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538966" accession="ERS11140934">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140934</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538966</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738591_virus.943</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738591.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738591) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560351) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_943_length_60576_cov_6.265674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_7_1773__NODE_943_length_60576_cov_6.265674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738240_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__Firm-11;s__Firm-11 sp900548145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_virus.943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538967" accession="ERS11140935">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140935</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538967</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_provirus.180</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.4432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_180_length_151537_cov_14.575261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738592_bin.90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_180_length_151537_cov_14.575261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_provirus.172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538968" accession="ERS11140936">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140936</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538968</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_provirus.444</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.31762027361593803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.2496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_444_length_91757_cov_42.032101_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_444_length_91757_cov_42.032101_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0295015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538969" accession="ERS11140937">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140937</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538969</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_provirus.74</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>168.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_74_length_218151_cov_96.109770_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738592_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_74_length_218151_cov_96.109770_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME249912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Megasphaeraceae;g__Megasphaera;s__Megasphaera sp000417505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_provirus.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538970" accession="ERS11140938">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140938</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538970</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_virus.1035</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>98.8137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_1035_length_50296_cov_70.590553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_1035_length_50296_cov_70.590553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_virus.1035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538971" accession="ERS11140939">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140939</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538971</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_virus.1274</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.23073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_1274_length_43920_cov_4.496887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_1274_length_43920_cov_4.496887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Mitsuokella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_virus.1274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538972" accession="ERS11140940">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140940</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538972</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_virus.1705</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.9588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_1705_length_34975_cov_21.033870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_1705_length_34975_cov_21.033870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0289566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538973" accession="ERS11140941">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140941</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538973</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_virus.2269</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_2269_length_27512_cov_7.972845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_2269_length_27512_cov_7.972845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_virus.2269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538974" accession="ERS11140942">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140942</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538974</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_virus.491</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.8714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_491_length_86784_cov_59.874324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6052631578947368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_491_length_86784_cov_59.874324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_virus.491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538975" accession="ERS11140943">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140943</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538975</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738592_virus.9681</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Inoviridae virus assembled from ERR7738592.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15236784140969165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738592) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_9681_length_7172_cov_10.820155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_21_THA1063YZ__NODE_9681_length_7172_cov_10.820155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.9812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae; unclassified Inoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538976" accession="ERS11140944">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140944</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538976</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_provirus.2</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_2_length_615939_cov_17.474767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738593_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.717391304347826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_2_length_615939_cov_17.474767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738612_provirus.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538977" accession="ERS11140945">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140945</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538977</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_provirus.544</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_544_length_76905_cov_7.344900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_544_length_76905_cov_7.344900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538978" accession="ERS11140946">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140946</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538978</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_provirus.890</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_890_length_58355_cov_10.013092_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_890_length_58355_cov_10.013092_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.1639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538979" accession="ERS11140947">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140947</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538979</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_virus.1326</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>238.068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_1326_length_45727_cov_139.337963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_1326_length_45727_cov_139.337963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_virus.1326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538980" accession="ERS11140948">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140948</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538980</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_virus.1454</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_1454_length_43297_cov_6.671356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7297297297297297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_1454_length_43297_cov_6.671356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_virus.1454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538981" accession="ERS11140949">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140949</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538981</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_virus.1730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_1730_length_38318_cov_10.982453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_1730_length_38318_cov_10.982453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_virus.1730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538982" accession="ERS11140950">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140950</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538982</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_virus.2286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03869765791341377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.3419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_2286_length_31606_cov_31.338038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_2286_length_31606_cov_31.338038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538983" accession="ERS11140951">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140951</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538983</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_virus.327</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>192.264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_327_length_100390_cov_110.309721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.990990990990991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_327_length_100390_cov_110.309721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538984" accession="ERS11140952">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140952</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538984</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_virus.5557</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.82834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_5557_length_16572_cov_3.051046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738593_bin.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_5557_length_16572_cov_3.051046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0278013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538985" accession="ERS11140953">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140953</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538985</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738593_virus.860</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738593.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.12377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738593) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560155) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_860_length_59526_cov_4.547965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_16_2189__NODE_860_length_59526_cov_4.547965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0360712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538986" accession="ERS11140954">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140954</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538986</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_provirus.11492</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Microviridae provirus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.90562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_11492_length_9580_cov_5.565506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738594_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_11492_length_9580_cov_5.565506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738594_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-314;g__CAG-1435;s__CAG-1435 sp900769665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_provirus.2959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538987" accession="ERS11140955">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140955</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538987</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_provirus.2065</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_2065_length_40619_cov_11.980193_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_2065_length_40619_cov_11.980193_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538988" accession="ERS11140956">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140956</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538988</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_provirus.488</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_488_length_98079_cov_6.224975_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738594_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_488_length_98079_cov_6.224975_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538989" accession="ERS11140957">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140957</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538989</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_provirus.779</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_779_length_75063_cov_19.239538_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_779_length_75063_cov_19.239538_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738604_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_virus.311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538990" accession="ERS11140958">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140958</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538990</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.1170</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.4489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1170_length_58991_cov_18.976101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1170_length_58991_cov_18.976101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.1796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538991" accession="ERS11140959">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140959</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538991</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.1379</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>308.823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1379_length_52402_cov_174.229431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1379_length_52402_cov_174.229431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738198_virus.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538992" accession="ERS11140960">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140960</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538992</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.1522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>119.813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1522_length_49343_cov_64.989689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1522_length_49343_cov_64.989689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538993" accession="ERS11140961">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140961</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538993</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.178</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.4445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_178_length_155160_cov_34.010240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_178_length_155160_cov_34.010240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538994" accession="ERS11140962">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140962</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538994</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.1869</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1869_length_43253_cov_69.632296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_1869_length_43253_cov_69.632296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME172369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-917;g__CAG-349;s__CAG-349 sp003539515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538995" accession="ERS11140963">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140963</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538995</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.2090</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.2416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_2090_length_40370_cov_16.097610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_2090_length_40370_cov_16.097610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.2090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538996" accession="ERS11140964">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140964</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538996</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.22712</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18068787764054067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.3454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_22712_length_5205_cov_14.327223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_22712_length_5205_cov_14.327223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.22712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538997" accession="ERS11140965">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140965</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538997</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.248</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.0633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_248_length_137919_cov_25.017680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_248_length_137919_cov_25.017680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538998" accession="ERS11140966">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140966</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538998</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.2785</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.88783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_2785_length_32610_cov_5.369778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_2785_length_32610_cov_5.369778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13538999" accession="ERS11140967">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140967</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13538999</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.3053</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lugh virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13538999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.75689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_3053_length_30560_cov_4.229538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9629629629629628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_3053_length_30560_cov_4.229538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.3053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lughvirus; Faecalibacterium virus Lugh</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539000" accession="ERS11140968">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140968</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539000</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03326220277569512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_360_length_115485_cov_8.981942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_360_length_115485_cov_8.981942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539001" accession="ERS11140969">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140969</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539001</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.4206</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05905891240983968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.2056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_4206_length_23647_cov_65.063683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_4206_length_23647_cov_65.063683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_provirus.1808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539002" accession="ERS11140970">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140970</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539002</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.452</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_452_length_102504_cov_8.674851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9777777777777776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_452_length_102504_cov_8.674851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539003" accession="ERS11140971">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140971</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539003</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.4991</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12488986784140972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.5911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_4991_length_20502_cov_16.049547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_4991_length_20502_cov_16.049547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738580_bin.236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UMGS1668;s__UMGS1668 sp900553955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.6153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539004" accession="ERS11140972">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140972</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539004</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.6079</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_6079_length_17155_cov_7.780712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_6079_length_17155_cov_7.780712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.4536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539005" accession="ERS11140973">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140973</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539005</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738594_virus.849</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>129.648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561686) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_849_length_71542_cov_71.168922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_O_6_1198__NODE_849_length_71542_cov_71.168922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539006" accession="ERS11140974">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140974</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539006</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_provirus.1597</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.5384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1597_length_41406_cov_17.455419_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1597_length_41406_cov_17.455419_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME086787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_C;s__Ruminococcus_C sp000433635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_provirus.1597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539007" accession="ERS11140975">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140975</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539007</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_provirus.612</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.3498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_612_length_75126_cov_11.336833_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738595_bin.271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_612_length_75126_cov_11.336833_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__Olsenella_E sp003609875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.1794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539008" accession="ERS11140976">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140976</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539008</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.1126</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.1247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1126_length_52548_cov_32.451659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1126_length_52548_cov_32.451659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.1126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539009" accession="ERS11140977">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140977</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539009</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.1378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.659300000000002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1378_length_45791_cov_11.951131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1378_length_45791_cov_11.951131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.1378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539010" accession="ERS11140978">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140978</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539010</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.1671</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1671_length_40224_cov_11.198097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1671_length_40224_cov_11.198097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.1671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539011" accession="ERS11140979">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140979</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539011</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.1887</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1887_length_37045_cov_13.134251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_1887_length_37045_cov_13.134251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.2098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539012" accession="ERS11140980">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140980</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539012</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.2120</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.6014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_2120_length_34110_cov_23.972674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_2120_length_34110_cov_23.972674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_provirus.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539013" accession="ERS11140981">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140981</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539013</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.2462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.31283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_2462_length_30668_cov_5.169037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9130434782608696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_2462_length_30668_cov_5.169037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.2769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539014" accession="ERS11140982">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140982</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539014</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.3112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_3112_length_25730_cov_19.091334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_3112_length_25730_cov_19.091334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.3112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539015" accession="ERS11140983">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140983</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539015</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.3808</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.81743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_3808_length_22295_cov_5.028130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_3808_length_22295_cov_5.028130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738561_bin.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__UBA1191 sp900066305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.4207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539016" accession="ERS11140984">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140984</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539016</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.0756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_543_length_80774_cov_14.102098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_543_length_80774_cov_14.102098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539017" accession="ERS11140985">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140985</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539017</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.6527</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.8177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_6527_length_15081_cov_34.961744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_6527_length_15081_cov_34.961744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea;s__Dorea formicigenerans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.7258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539018" accession="ERS11140986">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140986</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539018</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738595_virus.775</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738595.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738595) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560321) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_775_length_66058_cov_5.882057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_21_1740__NODE_775_length_66058_cov_5.882057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539019" accession="ERS11140987">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140987</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539019</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_provirus.1345</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1345_length_54340_cov_9.182666_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738596_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1345_length_54340_cov_9.182666_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;s__Anaerobutyricum hallii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_provirus.1345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539020" accession="ERS11140988">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140988</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539020</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_provirus.225</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus provirus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_225_length_133866_cov_8.915785_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738596_bin.228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_225_length_133866_cov_8.915785_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_provirus.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539021" accession="ERS11140989">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140989</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539021</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_provirus.455</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.9949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_455_length_96004_cov_22.073368_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738596_bin.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_455_length_96004_cov_22.073368_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738555_provirus.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539022" accession="ERS11140990">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140990</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539022</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.1005</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.6786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1005_length_63491_cov_21.744378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9318181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1005_length_63491_cov_21.744378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME192801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp000432135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745881_virus.550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539023" accession="ERS11140991">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140991</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539023</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.1407</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1407_length_52817_cov_52.874251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1407_length_52817_cov_52.874251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.1407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539024" accession="ERS11140992">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140992</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539024</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.1615</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2138578943106438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3501.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1615_length_49126_cov_1960.410039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_1615_length_49126_cov_1960.410039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.1720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539025" accession="ERS11140993">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140993</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539025</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.2158</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.5212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_2158_length_41548_cov_15.924140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_2158_length_41548_cov_15.924140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__F23-B02;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.1746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539026" accession="ERS11140994">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140994</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539026</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.2447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_2447_length_38713_cov_7.327234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_2447_length_38713_cov_7.327234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.2447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539027" accession="ERS11140995">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140995</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539027</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.3036</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>174.293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_3036_length_33678_cov_101.577304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_3036_length_33678_cov_101.577304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.3036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539028" accession="ERS11140996">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140996</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539028</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.3493</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.2722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_3493_length_30852_cov_8.154703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_3493_length_30852_cov_8.154703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.3493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539029" accession="ERS11140997">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140997</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539029</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.4271</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04000921161364958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.50398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_4271_length_26935_cov_5.319086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_4271_length_26935_cov_5.319086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738277_bin.130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger sp900554145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.4271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539030" accession="ERS11140998">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140998</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539030</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738596_virus.7438</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>148.486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561093) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_7438_length_18327_cov_85.957479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_21_1577__NODE_7438_length_18327_cov_85.957479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.3878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539031" accession="ERS11140999">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11140999</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539031</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_provirus.119</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18953744493392077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>525.977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_119_length_218523_cov_295.050287_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738597_bin.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8214285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_119_length_218523_cov_295.050287_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-245;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738597_provirus.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539032" accession="ERS11141000">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141000</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539032</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_provirus.2148</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_2148_length_38371_cov_9.491200_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738597_bin.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_2148_length_38371_cov_9.491200_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738562_provirus.1508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539033" accession="ERS11141001">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141001</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539033</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_provirus.34</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cbastvirus provirus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.7252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_34_length_332261_cov_20.906109_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738597_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_34_length_332261_cov_20.906109_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738597_provirus.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cbastvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539034" accession="ERS11141002">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141002</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539034</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_provirus.770</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>105.937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_770_length_87355_cov_64.447260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6888888888888889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_770_length_87355_cov_64.447260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0342940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539035" accession="ERS11141003">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141003</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539035</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_virus.1751</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.95152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_1751_length_46290_cov_4.360937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_1751_length_46290_cov_4.360937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0341914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539036" accession="ERS11141004">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141004</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539036</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_virus.1988</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>181.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_1988_length_41340_cov_106.493275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_1988_length_41340_cov_106.493275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME283914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900066565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738597_virus.1988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539037" accession="ERS11141005">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141005</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539037</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_virus.2274</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05958149779735685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.6132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_2274_length_36463_cov_55.866295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_2274_length_36463_cov_55.866295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_provirus.1602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539038" accession="ERS11141006">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141006</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539038</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_virus.2876</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.3019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_2876_length_29465_cov_21.054580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5757575757575758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_2876_length_29465_cov_21.054580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0311326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539039" accession="ERS11141007">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141007</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539039</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_virus.3932</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_3932_length_20789_cov_31.277762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_3932_length_20789_cov_31.277762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0264879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539040" accession="ERS11141008">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141008</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539040</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738597_virus.928</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phage DP SC_6_H4_2017 virus assembled from ERR7738597.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>197.029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738597) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558864) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_928_length_77657_cov_107.628384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5303030303030303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_21_7002__NODE_928_length_77657_cov_107.628384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Barnesiellaceae;g__Barnesiella;s__Barnesiella intestinihominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0351244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Phage DP SC_6_H4_2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539041" accession="ERS11141009">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141009</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539041</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_provirus.163</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2123898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>137.544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_163_length_163269_cov_80.650430_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738598_bin.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_163_length_163269_cov_80.650430_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539042" accession="ERS11141010">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141010</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539042</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_provirus.2312</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2312_length_40751_cov_7.754020_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738598_bin.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2312_length_40751_cov_7.754020_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_provirus.2312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539043" accession="ERS11141011">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141011</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539043</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_provirus.5471</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_5471_length_22742_cov_8.768454_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738598_bin.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_5471_length_22742_cov_8.768454_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.2722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539044" accession="ERS11141012">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141012</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539044</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.111</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03714174109185703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_111_length_192926_cov_6.550254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5409836065573771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_111_length_192926_cov_6.550254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539045" accession="ERS11141013">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141013</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539045</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.12691</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02349942002795121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.5129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_12691_length_12186_cov_14.292995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_12691_length_12186_cov_14.292995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.12691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539046" accession="ERS11141014">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141014</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539046</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.1545</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11513032049760293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>231.728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_1545_length_53217_cov_128.847309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_1545_length_53217_cov_128.847309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME155972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Pasteurellaceae;g__Haemophilus_D;s__Haemophilus_D parainfluenzae_K</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.1545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539047" accession="ERS11141015">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141015</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539047</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.1876</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.8003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_1876_length_46745_cov_42.710423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6944444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_1876_length_46745_cov_42.710423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.1876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539048" accession="ERS11141016">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141016</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539048</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.2136</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.4975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2136_length_43011_cov_30.289537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2136_length_43011_cov_30.289537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME157359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__P3;g__UBA3388;s__UBA3388 sp900545215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_provirus.172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539049" accession="ERS11141017">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141017</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539049</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.2286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2286_length_41089_cov_7.167341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2286_length_41089_cov_7.167341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.2286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539050" accession="ERS11141018">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141018</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539050</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.2552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Psavirus virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.0987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2552_length_37997_cov_38.875580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2552_length_37997_cov_38.875580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.2552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Psavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539051" accession="ERS11141019">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141019</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539051</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.2855</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.4854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2855_length_35490_cov_38.409482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_2855_length_35490_cov_38.409482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539052" accession="ERS11141020">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141020</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539052</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.32059</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_32059_length_5648_cov_6.903787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_32059_length_5648_cov_6.903787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.32059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539053" accession="ERS11141021">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141021</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539053</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.3564</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>105.955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_3564_length_30818_cov_56.823916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_3564_length_30818_cov_56.823916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.3564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539054" accession="ERS11141022">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141022</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539054</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.4627</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.42184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_4627_length_25603_cov_4.130024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_4627_length_25603_cov_4.130024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.4627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539055" accession="ERS11141023">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141023</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539055</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.5467</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11569612348054097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_5467_length_22752_cov_6.700728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_5467_length_22752_cov_6.700728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0341385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539056" accession="ERS11141024">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141024</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539056</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.59647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_685_length_83795_cov_8.183963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_685_length_83795_cov_8.183963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539057" accession="ERS11141025">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141025</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539057</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738598_virus.849</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738598.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738598) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560385) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_849_length_74736_cov_18.612558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_17_1778__NODE_849_length_74736_cov_18.612558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539058" accession="ERS11141026">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141026</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539058</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_provirus.1471</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona provirus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.5064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1471_length_50526_cov_12.601974_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9552238805970148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1471_length_50526_cov_12.601974_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738256_virus.291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539059" accession="ERS11141027">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141027</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539059</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_provirus.290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_290_length_118488_cov_10.126340_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_290_length_118488_cov_10.126340_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_provirus.290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539060" accession="ERS11141028">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141028</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539060</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_provirus.549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_549_length_88671_cov_9.574903_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_549_length_88671_cov_9.574903_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_virus.588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539061" accession="ERS11141029">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141029</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539061</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_provirus.987</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_987_length_63119_cov_5.688160_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738599_bin.136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_987_length_63119_cov_5.688160_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_provirus.351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539062" accession="ERS11141030">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141030</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539062</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.1106</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1106_length_59389_cov_6.877883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1106_length_59389_cov_6.877883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539063" accession="ERS11141031">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141031</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539063</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.1377</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.9736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1377_length_52640_cov_21.271693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1377_length_52640_cov_21.271693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539064" accession="ERS11141032">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141032</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539064</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.1617</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1617_length_47669_cov_5.994159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5490196078431373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1617_length_47669_cov_5.994159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539065" accession="ERS11141033">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141033</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539065</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.1881</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:13Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1881_length_43199_cov_6.858518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_1881_length_43199_cov_6.858518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.1881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539066" accession="ERS11141034">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141034</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539066</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.2098</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_2098_length_40145_cov_15.854422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_2098_length_40145_cov_15.854422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.2098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539067" accession="ERS11141035">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141035</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539067</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.238</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_238_length_132279_cov_18.097608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5957446808510638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_238_length_132279_cov_18.097608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_virus.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539068" accession="ERS11141036">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141036</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539068</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.2586</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.8741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_2586_length_34545_cov_40.587385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_2586_length_34545_cov_40.587385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_provirus.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539069" accession="ERS11141037">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141037</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539069</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.3635</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19297827089674777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:26Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_3635_length_27107_cov_15.332149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_3635_length_27107_cov_15.332149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.3635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539070" accession="ERS11141038">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141038</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539070</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.4496</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.24484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_4496_length_23055_cov_4.782662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_4496_length_23055_cov_4.782662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746739_virus.193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539071" accession="ERS11141039">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141039</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539071</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.526</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>216.872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_526_length_90834_cov_119.326851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_526_length_90834_cov_119.326851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539072" accession="ERS11141040">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141040</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539072</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738599_virus.768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738599.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>417.264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738599) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560520) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_768_length_72856_cov_242.696190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_14_1915__NODE_768_length_72856_cov_242.696190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539073" accession="ERS11141041">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141041</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539073</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_provirus.148</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.4663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_148_length_142957_cov_46.530676_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738600_bin.253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_148_length_142957_cov_46.530676_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0256792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539074" accession="ERS11141042">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141042</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539074</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_provirus.2818</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.93652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_2818_length_25073_cov_4.441871_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_2818_length_25073_cov_4.441871_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0365260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539075" accession="ERS11141043">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141043</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539075</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_provirus.72</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.4599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_72_length_189808_cov_16.794441_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7246376811594203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_72_length_189808_cov_16.794441_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_provirus.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539076" accession="ERS11141044">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141044</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539076</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.115</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_115_length_159495_cov_8.866107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7285714285714285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_115_length_159495_cov_8.866107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539077" accession="ERS11141045">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141045</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539077</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.1385</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.2133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_1385_length_38844_cov_14.033430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_1385_length_38844_cov_14.033430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539078" accession="ERS11141046">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141046</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539078</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.1533</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_1533_length_36528_cov_8.189021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_1533_length_36528_cov_8.189021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME158868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900549635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539079" accession="ERS11141047">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141047</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539079</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.1731</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.64549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_1731_length_34067_cov_4.394851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738600_bin.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_1731_length_34067_cov_4.394851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738571_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__Zag1;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539080" accession="ERS11141048">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141048</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539080</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.2133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_2133_length_29875_cov_9.687999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_2133_length_29875_cov_9.687999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.2503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539081" accession="ERS11141049">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141049</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539081</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.26</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.6751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_26_length_257472_cov_13.857344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.515625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_26_length_257472_cov_13.857344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539082" accession="ERS11141050">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141050</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539082</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.3430</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04316671142296685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_3430_length_22207_cov_11.177542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_3430_length_22207_cov_11.177542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME086787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_C;s__Ruminococcus_C sp000433635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738660_provirus.182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539083" accession="ERS11141051">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141051</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539083</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.4379</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_4379_length_18836_cov_11.662455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_4379_length_18836_cov_11.662455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738198_virus.716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539084" accession="ERS11141052">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141052</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539084</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.673</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7649779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_673_length_60283_cov_27.181261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_673_length_60283_cov_27.181261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539085" accession="ERS11141053">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141053</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539085</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738600_virus.9157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738600.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.2707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738600) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567950) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_9157_length_11138_cov_25.966741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_12_1518__NODE_9157_length_11138_cov_25.966741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.9157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539086" accession="ERS11141054">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141054</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539086</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_provirus.1567</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.2179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1567_length_44448_cov_29.731807_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1567_length_44448_cov_29.731807_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539087" accession="ERS11141055">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141055</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539087</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_provirus.290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07750921161364957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.8548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_290_length_118358_cov_35.543435_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738601_bin.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_290_length_118358_cov_35.543435_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.1761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539088" accession="ERS11141056">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141056</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539088</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_provirus.39</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_39_length_271971_cov_12.246835_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738601_bin.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_39_length_271971_cov_12.246835_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_provirus.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539089" accession="ERS11141057">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141057</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539089</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_provirus.878</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_878_length_64101_cov_10.396804_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738601_bin.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_878_length_64101_cov_10.396804_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_provirus.878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539090" accession="ERS11141058">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141058</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539090</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.1157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.5036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1157_length_53855_cov_16.839916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1157_length_53855_cov_16.839916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_provirus.1357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539091" accession="ERS11141059">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141059</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539091</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.1453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1453_length_46592_cov_10.828055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.935483870967742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1453_length_46592_cov_10.828055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539092" accession="ERS11141060">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141060</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539092</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.1822</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>176.558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1822_length_40268_cov_109.030703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1822_length_40268_cov_109.030703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539093" accession="ERS11141061">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141061</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539093</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.1906</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>77.2612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1906_length_39119_cov_45.922314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_1906_length_39119_cov_45.922314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.3103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539094" accession="ERS11141062">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141062</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539094</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.2101</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07359817362478667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.7039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_2101_length_36470_cov_22.527711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738601_bin.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_2101_length_36470_cov_22.527711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.2101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539095" accession="ERS11141063">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141063</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539095</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.2278</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_2278_length_34351_cov_56.751298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9310344827586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_2278_length_34351_cov_56.751298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnoclostridium_B;s__Lachnoclostridium_B sp900066555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.2278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539096" accession="ERS11141064">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141064</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539096</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.2630</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_2630_length_31121_cov_6.169244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_2630_length_31121_cov_6.169244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738222_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__CAG-269 sp900556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.2630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539097" accession="ERS11141065">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141065</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539097</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.33</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.2547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_33_length_294248_cov_17.110473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5272727272727272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_33_length_294248_cov_17.110473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539098" accession="ERS11141066">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141066</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539098</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.3807</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08312057094440387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.70758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_3807_length_23469_cov_3.637868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_3807_length_23469_cov_3.637868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.1505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539099" accession="ERS11141067">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141067</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539099</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.4233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.2898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_4233_length_21625_cov_30.433729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_4233_length_21625_cov_30.433729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745963_bin.410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp001916075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.4233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539100" accession="ERS11141068">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141068</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539100</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.5029</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06720782038701123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_5029_length_18895_cov_13.525986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_5029_length_18895_cov_13.525986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.2701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539101" accession="ERS11141069">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141069</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539101</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_716_length_71840_cov_10.507741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6170212765957447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_716_length_71840_cov_10.507741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539102" accession="ERS11141070">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141070</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539102</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738601_virus.891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738601.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>348.449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738601) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_891_length_63605_cov_200.202981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4888888888888889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_16_1933__NODE_891_length_63605_cov_200.202981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745858_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Anaerotignaceae;g__UMGS1670;s__UMGS1670 sp900553995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539103" accession="ERS11141071">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141071</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539103</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_provirus.1554</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1554_length_48513_cov_11.381183_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1554_length_48513_cov_11.381183_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745975_bin.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__UMGS124;g__UMGS124;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.1554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539104" accession="ERS11141072">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141072</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539104</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_provirus.2182</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.0946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_2182_length_38898_cov_14.348368_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738602_bin.358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_2182_length_38898_cov_14.348368_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.2182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539105" accession="ERS11141073">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141073</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539105</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_provirus.4383</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.2829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_4383_length_24804_cov_24.670522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_4383_length_24804_cov_24.670522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_virus.1581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539106" accession="ERS11141074">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141074</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539106</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_provirus.644</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_644_length_83033_cov_7.812069_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738602_bin.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_644_length_83033_cov_7.812069_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738167_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFHK01;s__SFHK01 sp004556395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539107" accession="ERS11141075">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141075</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539107</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.1192</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.1746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1192_length_57417_cov_24.009383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9318181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1192_length_57417_cov_24.009383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__Ruminococcus sp900540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.1271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539108" accession="ERS11141076">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141076</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539108</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.1448</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1448_length_50804_cov_6.072900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1448_length_50804_cov_6.072900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.1328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539109" accession="ERS11141077">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141077</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539109</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.1935</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.1217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1935_length_42398_cov_13.980104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_1935_length_42398_cov_13.980104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UMGS973;s__UMGS973 sp900547295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.1935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539110" accession="ERS11141078">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141078</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539110</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.2263</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.3169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_2263_length_38085_cov_20.040465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_2263_length_38085_cov_20.040465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.2263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539111" accession="ERS11141079">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141079</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539111</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.2566</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.5798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_2566_length_35418_cov_24.831272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_2566_length_35418_cov_24.831272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745975_bin.137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;s__CAG-488 sp000434055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.2566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539112" accession="ERS11141080">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141080</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539112</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.3393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.4173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_3393_length_29407_cov_17.950631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738602_bin.251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_3393_length_29407_cov_17.950631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.3044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539113" accession="ERS11141081">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141081</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539113</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.4635</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_4635_length_23929_cov_8.409274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_4635_length_23929_cov_8.409274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.2423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539114" accession="ERS11141082">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141082</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539114</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.6175</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_6175_length_19589_cov_6.409338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_6175_length_19589_cov_6.409338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_provirus.652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539115" accession="ERS11141083">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141083</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539115</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738602_virus.942</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738602.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.9466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738602) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567849) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_942_length_66422_cov_15.028849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_4_1526__NODE_942_length_66422_cov_15.028849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0361518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539116" accession="ERS11141084">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141084</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539116</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_provirus.1457</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Eganvirus provirus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>214.483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_1457_length_44074_cov_119.298907_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5952380952380952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_1457_length_44074_cov_119.298907_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.1457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Peduovirinae; Eganvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539117" accession="ERS11141085">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141085</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539117</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_provirus.295</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.2931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_295_length_121500_cov_60.316999_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738603_bin.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_295_length_121500_cov_60.316999_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0340593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539118" accession="ERS11141086">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141086</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539118</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_provirus.535</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_535_length_87259_cov_5.932647_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738603_bin.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_535_length_87259_cov_5.932647_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539119" accession="ERS11141087">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141087</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539119</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_provirus.900</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_900_length_62726_cov_8.732558_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738603_bin.176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_900_length_62726_cov_8.732558_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539120" accession="ERS11141088">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141088</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539120</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_virus.1494</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vectrevirus virus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00128438135477076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_1494_length_43407_cov_5.535033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7884615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_1494_length_43407_cov_5.535033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.1494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae; Molineuxvirinae; Vectrevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539121" accession="ERS11141089">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141089</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539121</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_virus.1891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06265694085351817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_1891_length_35972_cov_5.692771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_1891_length_35972_cov_5.692771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746794_bin.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Micrococcaceae;g__Rothia;s__Rothia sp902373285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.1891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539122" accession="ERS11141090">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141090</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539122</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_virus.2746</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus virus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17230176211453746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.1702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_2746_length_26917_cov_37.166542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_2746_length_26917_cov_37.166542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0261292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539123" accession="ERS11141091">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141091</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539123</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_virus.369</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.2846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_369_length_107447_cov_15.884251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_369_length_107447_cov_15.884251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-273;s__CAG-273 sp003534295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539124" accession="ERS11141092">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141092</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539124</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738603_virus.5316</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>219.446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560031) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_5316_length_14654_cov_123.774508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_19_2149__NODE_5316_length_14654_cov_123.774508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A sp900066205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.8840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539125" accession="ERS11141093">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141093</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539125</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_provirus.1342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05270140052539582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.2765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1342_length_45737_cov_35.011870_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738604_bin.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1342_length_45737_cov_35.011870_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539126" accession="ERS11141094">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141094</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539126</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_provirus.212</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0378845907965548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.0788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_212_length_138333_cov_19.865677_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738604_bin.249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5142857142857142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_212_length_138333_cov_19.865677_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746349_bin.313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-95;s__CAG-95 sp000436115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539127" accession="ERS11141095">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141095</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539127</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_provirus.695</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.1833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_695_length_71838_cov_10.428185_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_695_length_71838_cov_10.428185_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_provirus.695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539128" accession="ERS11141096">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141096</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539128</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.1119</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.2386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1119_length_52141_cov_23.833897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1119_length_52141_cov_23.833897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539129" accession="ERS11141097">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141097</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539129</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.1301</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>78.609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1301_length_46940_cov_44.452446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1301_length_46940_cov_44.452446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539130" accession="ERS11141098">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141098</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539130</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.1558</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>166.464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1558_length_41664_cov_95.313608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6176470588235294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1558_length_41664_cov_95.313608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_virus.1558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539131" accession="ERS11141099">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141099</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539131</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.1833</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>200.342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1833_length_37050_cov_113.109458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_1833_length_37050_cov_113.109458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.1890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539132" accession="ERS11141100">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141100</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539132</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.2253</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13243392070484583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.05109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_2253_length_31977_cov_5.413480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_2253_length_31977_cov_5.413480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.1046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539133" accession="ERS11141101">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141101</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539133</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.2687</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0880538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.1653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_2687_length_28184_cov_53.362685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_2687_length_28184_cov_53.362685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.1123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539134" accession="ERS11141102">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141102</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539134</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.4158</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8574339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_4158_length_20338_cov_9.393613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_4158_length_20338_cov_9.393613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539135" accession="ERS11141103">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141103</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539135</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.591</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:12Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.7682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_591_length_80092_cov_48.567094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7575757575757576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_591_length_80092_cov_48.567094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_virus.591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539136" accession="ERS11141104">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141104</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539136</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738604_virus.894</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738604.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07360955218654153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.4329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738604) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560302) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_894_length_60931_cov_44.082640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_22_2297__NODE_894_length_60931_cov_44.082640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539137" accession="ERS11141105">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141105</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539137</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_provirus.1273</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10914096916299568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.1896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1273_length_51462_cov_27.744848_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1273_length_51462_cov_27.744848_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_provirus.1602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539138" accession="ERS11141106">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141106</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539138</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_provirus.1831</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1831_length_41922_cov_7.409750_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738605_bin.282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1831_length_41922_cov_7.409750_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_provirus.719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539139" accession="ERS11141107">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141107</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539139</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_provirus.343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.802477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_343_length_102139_cov_12.353275_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738605_bin.246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_343_length_102139_cov_12.353275_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME108259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900313215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738433_provirus.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539140" accession="ERS11141108">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141108</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539140</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_provirus.56</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>715.765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_56_length_202820_cov_538.454990_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738605_bin.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8032786885245902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_56_length_202820_cov_538.454990_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738221_virus.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539141" accession="ERS11141109">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141109</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539141</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_provirus.859</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis provirus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2149779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.6561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_859_length_63874_cov_58.645673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_859_length_63874_cov_58.645673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738605_provirus.859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539142" accession="ERS11141110">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141110</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539142</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.1047</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2261.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1047_length_57889_cov_1315.322113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1047_length_57889_cov_1315.322113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738256_virus.457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539143" accession="ERS11141111">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141111</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539143</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.1397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.4486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1397_length_48503_cov_46.963800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1397_length_48503_cov_46.963800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738616_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4334;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.1152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539144" accession="ERS11141112">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141112</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539144</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.1673</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18963559754245787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1673_length_44253_cov_15.517747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_1673_length_44253_cov_15.517747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738605_virus.1673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539145" accession="ERS11141113">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141113</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539145</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.2254</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.73597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_2254_length_36506_cov_5.620577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_2254_length_36506_cov_5.620577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.1253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539146" accession="ERS11141114">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141114</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539146</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.2780</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_2780_length_31433_cov_7.856742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9642857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_2780_length_31433_cov_7.856742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.1517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539147" accession="ERS11141115">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141115</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539147</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_351_length_101283_cov_8.319388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_351_length_101283_cov_8.319388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738550_virus.509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539148" accession="ERS11141116">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141116</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539148</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.4682</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.1963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_4682_length_21362_cov_4.152549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_4682_length_21362_cov_4.152549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.1786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539149" accession="ERS11141117">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141117</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539149</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738605_virus.802</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738605.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738605) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567948) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_802_length_66348_cov_8.593653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8780487804878049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_24_1307__NODE_802_length_66348_cov_8.593653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738574_virus.721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539150" accession="ERS11141118">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141118</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539150</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_provirus.118</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.4046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_118_length_151101_cov_39.360148_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_118_length_151101_cov_39.360148_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_provirus.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539151" accession="ERS11141119">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141119</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539151</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_provirus.152</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.5994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_152_length_133641_cov_14.371470_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738606_bin.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9838709677419356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_152_length_133641_cov_14.371470_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738530_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium sp900539945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_provirus.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539152" accession="ERS11141120">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141120</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539152</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_provirus.30</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>153.663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_30_length_247433_cov_87.161585_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_30_length_247433_cov_87.161585_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia;s__Blautia stercoris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_provirus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539153" accession="ERS11141121">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141121</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539153</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_provirus.579</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>200.462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_579_length_75165_cov_115.419681_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738606_bin.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_579_length_75165_cov_115.419681_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.2792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539154" accession="ERS11141122">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141122</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539154</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_provirus.960</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.8277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_960_length_58273_cov_47.876572_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738606_bin.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_960_length_58273_cov_47.876572_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539155" accession="ERS11141123">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141123</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539155</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.1215</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Klebsiella phage P-K7R virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>135.869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1215_length_51750_cov_78.186790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6447368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1215_length_51750_cov_78.186790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME239725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola plebeius_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.1208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Klebsiella phage P-K7R</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539156" accession="ERS11141124">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141124</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539156</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.1483</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1483_length_46513_cov_12.928827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1483_length_46513_cov_12.928827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.1412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539157" accession="ERS11141125">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141125</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539157</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.1693</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>344.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1693_length_43152_cov_194.993685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4583333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1693_length_43152_cov_194.993685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539158" accession="ERS11141126">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141126</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539158</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.1864</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.6457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1864_length_40988_cov_25.732590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738606_bin.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7037037037037037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_1864_length_40988_cov_25.732590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738534_bin.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales;f__Monoglobaceae;g__UMGS1820;s__UMGS1820 sp900555375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.1864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539159" accession="ERS11141127">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141127</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539159</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.2093</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_2093_length_38448_cov_10.701467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_2093_length_38448_cov_10.701467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.2093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539160" accession="ERS11141128">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141128</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539160</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.2566</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_2566_length_33914_cov_7.219375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_2566_length_33914_cov_7.219375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;s__Anaerobutyricum hallii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.2566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539161" accession="ERS11141129">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141129</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539161</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.2947</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.1157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_2947_length_31055_cov_15.394990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_2947_length_31055_cov_15.394990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.1999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539162" accession="ERS11141130">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141130</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539162</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.3662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_3662_length_26940_cov_43.686111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_3662_length_26940_cov_43.686111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.3662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539163" accession="ERS11141131">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141131</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539163</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.4459</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24217479228380095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.02313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_4459_length_23674_cov_4.556003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_4459_length_23674_cov_4.556003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539164" accession="ERS11141132">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141132</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539164</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.59361</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>4.69001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_59361_length_2834_cov_2.286543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_59361_length_2834_cov_2.286543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME258268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp003522105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.59361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539165" accession="ERS11141133">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141133</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539165</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.778</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_778_length_64331_cov_10.602966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_778_length_64331_cov_10.602966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539166" accession="ERS11141134">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141134</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539166</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738606_virus.9394</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738606.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738606) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567847) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_9394_length_13951_cov_17.513406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_18_1651__NODE_9394_length_13951_cov_17.513406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745963_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.8602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539167" accession="ERS11141135">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141135</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539167</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_provirus.1416</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus provirus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:38Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1416_length_50335_cov_8.525309_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738607_bin.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1416_length_50335_cov_8.525309_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UBA6414;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_provirus.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539168" accession="ERS11141136">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141136</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539168</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_provirus.1885</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>173.712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1885_length_42251_cov_102.252525_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1885_length_42251_cov_102.252525_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_provirus.1885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539169" accession="ERS11141137">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141137</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539169</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_provirus.387</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.7801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_387_length_106011_cov_23.372043_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738607_bin.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_387_length_106011_cov_23.372043_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_virus.1289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539170" accession="ERS11141138">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141138</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539170</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_provirus.797</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0160654993231054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_797_length_72477_cov_7.019171_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738607_bin.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_797_length_72477_cov_7.019171_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UMGS1124;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_provirus.797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539171" accession="ERS11141139">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141139</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539171</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.1043</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>121.779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1043_length_61500_cov_67.430393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9142857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1043_length_61500_cov_67.430393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738591_bin.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UMGS1603;s__UMGS1603 sp900553265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.1043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539172" accession="ERS11141140">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141140</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539172</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.1267</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>105.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1267_length_54096_cov_59.536811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5142857142857142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1267_length_54096_cov_59.536811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738148_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Selenomonas_A;s__Selenomonas_A sp900769615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.1267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539173" accession="ERS11141141">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141141</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539173</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.1560</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1560_length_47261_cov_7.212572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1560_length_47261_cov_7.212572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.1560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539174" accession="ERS11141142">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141142</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539174</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.1798</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1798_length_43528_cov_52.926883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1798_length_43528_cov_52.926883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539175" accession="ERS11141143">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141143</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539175</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.1976</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.13342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1976_length_41014_cov_5.483279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_1976_length_41014_cov_5.483279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.1976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539176" accession="ERS11141144">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141144</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539176</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.2181</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.0571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_2181_length_38544_cov_11.270180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4705882352941176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_2181_length_38544_cov_11.270180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.2181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539177" accession="ERS11141145">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141145</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539177</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.2409</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.9989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_2409_length_36086_cov_36.437030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_2409_length_36086_cov_36.437030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738250_virus.615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539178" accession="ERS11141146">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141146</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539178</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.2592</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_2592_length_34427_cov_5.713013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_2592_length_34427_cov_5.713013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.2592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539179" accession="ERS11141147">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141147</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539179</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.3146</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_3146_length_30192_cov_9.170347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_3146_length_30192_cov_9.170347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.3146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539180" accession="ERS11141148">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141148</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539180</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.4290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.4243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_4290_length_24064_cov_13.430483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_4290_length_24064_cov_13.430483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.4290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539181" accession="ERS11141149">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141149</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539181</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.49745</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Ralstonia phage 1 NP-2014 virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15957970773951435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.72401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_49745_length_3295_cov_3.876321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_49745_length_3295_cov_3.876321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.49745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae; unclassified Inoviridae; Ralstonia phage 1 NP-2014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539182" accession="ERS11141150">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141150</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539182</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.577</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.9802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_577_length_86578_cov_52.088646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_577_length_86578_cov_52.088646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539183" accession="ERS11141151">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141151</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539183</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.632</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>358.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_632_length_82638_cov_210.010186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_632_length_82638_cov_210.010186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539184" accession="ERS11141152">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141152</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539184</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.803</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.1755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_803_length_72039_cov_19.487480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_803_length_72039_cov_19.487480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539185" accession="ERS11141153">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141153</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539185</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738607_virus.9727</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738607.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.9282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738607) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560761) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_9727_length_12905_cov_12.191496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_15_1726__NODE_9727_length_12905_cov_12.191496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.9727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539186" accession="ERS11141154">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141154</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539186</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_provirus.1491</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>282.863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_1491_length_59155_cov_169.901875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738608_bin.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_1491_length_59155_cov_169.901875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738143_provirus.773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539187" accession="ERS11141155">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141155</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539187</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_provirus.182</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.0294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_182_length_188243_cov_62.793608_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_182_length_188243_cov_62.793608_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_provirus.918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539188" accession="ERS11141156">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141156</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539188</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_provirus.31</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.8841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_31_length_365069_cov_18.616112_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.603448275862069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_31_length_365069_cov_18.616112_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_virus.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539189" accession="ERS11141157">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141157</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539189</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_provirus.559</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_559_length_108783_cov_8.111144_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738608_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_559_length_108783_cov_8.111144_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539190" accession="ERS11141158">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141158</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539190</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.1037</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Schitoviridae virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11513032049760293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.5861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_1037_length_74939_cov_18.432315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_1037_length_74939_cov_18.432315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Schitoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539191" accession="ERS11141159">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141159</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539191</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.1550</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.4951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_1550_length_57310_cov_4.920745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_1550_length_57310_cov_4.920745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539192" accession="ERS11141160">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141160</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539192</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.196</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.6272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_196_length_181453_cov_12.990192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8103448275862069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_196_length_181453_cov_12.990192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738249_virus.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539193" accession="ERS11141161">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141161</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539193</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.2316</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09273040577452842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_2316_length_42164_cov_10.258108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_2316_length_42164_cov_10.258108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0280225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539194" accession="ERS11141162">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141162</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539194</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.2651</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_2651_length_37875_cov_7.694878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_2651_length_37875_cov_7.694878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738569_virus.1419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539195" accession="ERS11141163">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141163</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539195</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.3086</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_3086_length_33869_cov_7.282759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8928571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_3086_length_33869_cov_7.282759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_provirus.2663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539196" accession="ERS11141164">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141164</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539196</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.4553</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.5595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_4553_length_24450_cov_22.219915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_4553_length_24450_cov_22.219915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738179_virus.1723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539197" accession="ERS11141165">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141165</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539197</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.6195</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Ceridwen virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07747797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.41085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_6195_length_18546_cov_5.007147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_6195_length_18546_cov_5.007147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.4576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Butyrivibrio virus Ceridwen</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539198" accession="ERS11141166">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141166</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539198</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738608_virus.9001</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738608.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2693548984968848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738608) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559868) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_9001_length_13264_cov_9.375066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_17_2476__NODE_9001_length_13264_cov_9.375066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__HGM11416;s__HGM11416 sp900768525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.6311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539199" accession="ERS11141167">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141167</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539199</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_provirus.258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>727.554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_258_length_127313_cov_412.919818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738609_bin.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>39.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_258_length_127313_cov_412.919818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0370468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539200" accession="ERS11141168">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141168</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539200</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_provirus.60</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.3558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_60_length_242047_cov_47.859987_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738609_bin.136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_60_length_242047_cov_47.859987_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539201" accession="ERS11141169">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141169</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539201</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.1251</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_1251_length_51615_cov_6.947883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4074074074074074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_1251_length_51615_cov_6.947883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738609_virus.1251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539202" accession="ERS11141170">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141170</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539202</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.1424</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17695805183806096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.8127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_1424_length_47689_cov_29.865433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_1424_length_47689_cov_29.865433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME117056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella sp002299315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745397_virus.290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539203" accession="ERS11141171">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141171</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539203</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.1783</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_1783_length_40737_cov_11.520389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_1783_length_40737_cov_11.520389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539204" accession="ERS11141172">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141172</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539204</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.2141</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.8681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_2141_length_35756_cov_46.140643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_2141_length_35756_cov_46.140643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738609_virus.2141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539205" accession="ERS11141173">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141173</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539205</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.2701</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.5469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_2701_length_29570_cov_9.205337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_2701_length_29570_cov_9.205337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900542795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539206" accession="ERS11141174">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141174</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539206</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.3084</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.19716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_3084_length_26469_cov_4.989429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_3084_length_26469_cov_4.989429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746757_virus.1092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539207" accession="ERS11141175">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141175</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539207</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.381</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02549472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.7636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_381_length_103597_cov_39.253149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_381_length_103597_cov_39.253149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0369090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539208" accession="ERS11141176">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141176</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539208</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.4937</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17270013350281002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_4937_length_17845_cov_45.108397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_4937_length_17845_cov_45.108397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738609_virus.4937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539209" accession="ERS11141177">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141177</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539209</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738609_virus.8239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738609.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>214.697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738609) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560346) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_8239_length_11604_cov_115.228148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_1_1770__NODE_8239_length_11604_cov_115.228148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745970_bin.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900551985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.8863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539210" accession="ERS11141178">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141178</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539210</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_provirus.1189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.5419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_1189_length_41908_cov_14.764266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_1189_length_41908_cov_14.764266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0256792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539211" accession="ERS11141179">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141179</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539211</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_provirus.378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.5102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_378_length_76660_cov_19.971286_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_378_length_76660_cov_19.971286_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539212" accession="ERS11141180">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141180</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539212</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_provirus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>116.283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_71_length_143974_cov_66.608484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738610_bin.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_71_length_143974_cov_66.608484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella stercorea</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0318509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539213" accession="ERS11141181">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141181</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539213</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_virus.1049</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>124.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_1049_length_45386_cov_71.013618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_1049_length_45386_cov_71.013618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738569_provirus.178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539214" accession="ERS11141182">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141182</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539214</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_virus.1309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_1309_length_39272_cov_10.582549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_1309_length_39272_cov_10.582549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.1309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539215" accession="ERS11141183">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141183</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539215</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_virus.160</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>184.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_160_length_108784_cov_106.859199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_160_length_108784_cov_106.859199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539216" accession="ERS11141184">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141184</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539216</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_virus.219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_219_length_98000_cov_9.306373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_219_length_98000_cov_9.306373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539217" accession="ERS11141185">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141185</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539217</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_virus.3246</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16375920696193652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_3246_length_19805_cov_5.886050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_3246_length_19805_cov_5.886050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.3747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539218" accession="ERS11141186">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141186</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539218</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738610_virus.954</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738610.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738610) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561162) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_954_length_47773_cov_11.690624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_5_1598__NODE_954_length_47773_cov_11.690624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738564_virus.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539219" accession="ERS11141187">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141187</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539219</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_provirus.171</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.8447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_171_length_166165_cov_28.512530_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738611_bin.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_171_length_166165_cov_28.512530_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_virus.1424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539220" accession="ERS11141188">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141188</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539220</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_provirus.2772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07403688597812165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.6534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2772_length_35597_cov_17.964414_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2772_length_35597_cov_17.964414_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745594_bin.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA5884;s__UBA5884 sp900551505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_provirus.2772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539221" accession="ERS11141189">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141189</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539221</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_provirus.402</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_402_length_110746_cov_7.027885_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738611_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_402_length_110746_cov_7.027885_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__UBA6984;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_provirus.402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539222" accession="ERS11141190">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141190</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539222</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_provirus.736</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_736_length_82516_cov_8.708221_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738611_bin.182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_736_length_82516_cov_8.708221_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__UMGS1781;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738612_provirus.1138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539223" accession="ERS11141191">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141191</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539223</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.11435</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.76918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_11435_length_11839_cov_3.295188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_11435_length_11839_cov_3.295188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_virus.11435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539224" accession="ERS11141192">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141192</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539224</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.149</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_149_length_178048_cov_11.988285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_149_length_178048_cov_11.988285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_virus.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539225" accession="ERS11141193">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141193</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539225</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.1899</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.1108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_1899_length_46654_cov_27.044507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_1899_length_46654_cov_27.044507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738170_virus.283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539226" accession="ERS11141194">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141194</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539226</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.2315</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2315_length_40593_cov_6.054250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2315_length_40593_cov_6.054250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_virus.2315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539227" accession="ERS11141195">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141195</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539227</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.2538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2538_length_37949_cov_7.127244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3636363636363637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2538_length_37949_cov_7.127244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_virus.2538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539228" accession="ERS11141196">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141196</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539228</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.2672</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.5696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2672_length_36530_cov_13.967026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738611_bin.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_2672_length_36530_cov_13.967026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_virus.2672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539229" accession="ERS11141197">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141197</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539229</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.3025</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09759913048709812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.5626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_3025_length_33328_cov_13.352771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_3025_length_33328_cov_13.352771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539230" accession="ERS11141198">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141198</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539230</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.3423</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12951046580442682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_3423_length_30556_cov_12.714262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_3423_length_30556_cov_12.714262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746433_provirus.452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539231" accession="ERS11141199">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141199</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539231</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.4956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2599118942731277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.49763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_4956_length_23316_cov_4.386161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_4956_length_23316_cov_4.386161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738552_bin.355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__CAG-238;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_virus.4956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539232" accession="ERS11141200">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141200</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539232</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.5565</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.98969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_5565_length_21396_cov_4.645574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_5565_length_21396_cov_4.645574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_provirus.1362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539233" accession="ERS11141201">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141201</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539233</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738611_virus.886</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738611.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.9644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738611) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559915) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_886_length_73671_cov_34.963339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6060606060606061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_9_2483__NODE_886_length_73671_cov_34.963339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539234" accession="ERS11141202">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141202</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539234</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_provirus.158</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1751132947985236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_158_length_154014_cov_48.862327_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738612_bin.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_158_length_154014_cov_48.862327_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_provirus.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539235" accession="ERS11141203">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141203</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539235</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_provirus.351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>154.422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_351_length_108015_cov_88.313967_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738612_bin.111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_351_length_108015_cov_88.313967_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738612_provirus.351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539236" accession="ERS11141204">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141204</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539236</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_provirus.704</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.0452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_704_length_76080_cov_12.439996_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738612_bin.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_704_length_76080_cov_12.439996_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738231_virus.1408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539237" accession="ERS11141205">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141205</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539237</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_virus.1561</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.039370028578617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.5234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_1561_length_46805_cov_20.426596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_1561_length_46805_cov_20.426596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_provirus.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539238" accession="ERS11141206">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141206</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539238</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_virus.1795</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03978894729646896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_1795_length_42974_cov_22.176143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_1795_length_42974_cov_22.176143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738170_provirus.992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539239" accession="ERS11141207">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141207</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539239</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_virus.2233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_2233_length_36418_cov_9.532374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_2233_length_36418_cov_9.532374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539240" accession="ERS11141208">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141208</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539240</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_virus.2573</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16464757709251104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>114.955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_2573_length_32286_cov_68.091434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738612_bin.111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_2573_length_32286_cov_68.091434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_provirus.1304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539241" accession="ERS11141209">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141209</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539241</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_virus.413</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.8186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_413_length_100724_cov_17.525361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.988095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_413_length_100724_cov_17.525361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539242" accession="ERS11141210">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141210</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539242</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738612_virus.8656</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738612.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06078698503890495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.0017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738612) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559618) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_8656_length_11212_cov_55.705344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_2_2674__NODE_8656_length_11212_cov_55.705344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME181333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900540885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738231_virus.7124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539243" accession="ERS11141211">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141211</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539243</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_provirus.1853</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:57Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.0324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1853_length_35877_cov_51.285922_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1853_length_35877_cov_51.285922_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME117056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella sp002299315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.2092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539244" accession="ERS11141212">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141212</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539244</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_provirus.404</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04171309743186609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:58Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:58Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.3928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_404_length_84376_cov_11.386102_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738613_bin.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_404_length_84376_cov_11.386102_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539245" accession="ERS11141213">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141213</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539245</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.1059</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.0183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1059_length_49782_cov_19.250599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1059_length_49782_cov_19.250599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0339185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539246" accession="ERS11141214">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141214</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539246</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.1406</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1406_length_41963_cov_7.810223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1406_length_41963_cov_7.810223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.1406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539247" accession="ERS11141215">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141215</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539247</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.1489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1489_length_40573_cov_20.317834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1489_length_40573_cov_20.317834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.1099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539248" accession="ERS11141216">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141216</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539248</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.1679</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.3465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1679_length_38068_cov_24.812982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1679_length_38068_cov_24.812982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_provirus.383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539249" accession="ERS11141217">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141217</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539249</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.1924</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1924_length_34900_cov_7.582173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738613_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_1924_length_34900_cov_7.582173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738178_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.1924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539250" accession="ERS11141218">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141218</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539250</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.22389</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.56982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_22389_length_5822_cov_3.788860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_22389_length_5822_cov_3.788860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.15575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539251" accession="ERS11141219">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141219</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539251</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.282</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_282_length_100964_cov_5.738678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6511627906976745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_282_length_100964_cov_5.738678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539252" accession="ERS11141220">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141220</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539252</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.3518</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13705947136563884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.52771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_3518_length_23174_cov_4.776421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_3518_length_23174_cov_4.776421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.3518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539253" accession="ERS11141221">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141221</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539253</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.4997</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_4997_length_18359_cov_11.436221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_4997_length_18359_cov_11.436221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME086787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_C;s__Ruminococcus_C sp000433635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.4997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539254" accession="ERS11141222">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141222</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539254</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.671</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.1261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_671_length_64090_cov_14.128068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_671_length_64090_cov_14.128068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539255" accession="ERS11141223">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141223</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539255</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738613_virus.972</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.5117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562068) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_972_length_52725_cov_45.359843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43333333333333335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_9_1735__NODE_972_length_52725_cov_45.359843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_provirus.872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539256" accession="ERS11141224">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141224</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539256</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_provirus.24</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:06Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.6934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_24_length_218357_cov_39.573122_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738614_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.380952380952381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_24_length_218357_cov_39.573122_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_provirus.135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539257" accession="ERS11141225">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141225</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539257</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_provirus.43</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.6926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_43_length_186647_cov_30.588026_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5111111111111111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_43_length_186647_cov_30.588026_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME103816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;s__Dorea_A longicatena</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_provirus.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539258" accession="ERS11141226">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141226</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539258</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_provirus.83</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_83_length_144955_cov_10.840611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738614_bin.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_83_length_144955_cov_10.840611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__CAG-274;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738614_provirus.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539259" accession="ERS11141227">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141227</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539259</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_virus.1322</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Teubervirus virus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10658093884155775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>197.515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_1322_length_34662_cov_119.973110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_1322_length_34662_cov_119.973110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Teubervirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539260" accession="ERS11141228">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141228</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539260</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_virus.1655</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_1655_length_29996_cov_6.185668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_1655_length_29996_cov_6.185668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Schaedlerella;s__Schaedlerella sp900066545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738614_virus.1655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539261" accession="ERS11141229">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141229</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539261</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_virus.2447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_2447_length_23322_cov_8.131297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_2447_length_23322_cov_8.131297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME067489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-217;s__CAG-217 sp000436335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_provirus.439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539262" accession="ERS11141230">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141230</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539262</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_virus.2927</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15709857153129908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_2927_length_20701_cov_14.791020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_2927_length_20701_cov_14.791020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME086861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter;s__Fusicatenibacter saccharivorans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.2474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539263" accession="ERS11141231">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141231</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539263</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_virus.5705</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>192.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_5705_length_13251_cov_115.477987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_5705_length_13251_cov_115.477987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.4087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539264" accession="ERS11141232">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141232</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539264</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738614_virus.863</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.3635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567908) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_863_length_44738_cov_22.848794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_I_10_1184__NODE_863_length_44738_cov_22.848794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539265" accession="ERS11141233">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141233</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539265</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_provirus.1611</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1611_length_38550_cov_15.383360_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738616_bin.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1611_length_38550_cov_15.383360_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746741_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;s__RUG572 sp900547945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738141_provirus.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539266" accession="ERS11141234">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141234</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539266</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_provirus.56</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_56_length_204393_cov_9.319941_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738616_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_56_length_204393_cov_9.319941_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_virus.716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539267" accession="ERS11141235">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141235</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539267</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_provirus.78</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.9405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_78_length_182516_cov_9.396242_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738616_bin.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_78_length_182516_cov_9.396242_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_provirus.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539268" accession="ERS11141236">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141236</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539268</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_virus.1222</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.6929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1222_length_46503_cov_9.944428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1222_length_46503_cov_9.944428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_virus.1222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539269" accession="ERS11141237">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141237</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539269</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_virus.1440</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.50914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1440_length_41867_cov_5.692845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738616_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1440_length_41867_cov_5.692845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746219_bin.253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__UBA2883;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_virus.1440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539270" accession="ERS11141238">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141238</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539270</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_virus.1769</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>192.214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1769_length_36173_cov_146.703457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_1769_length_36173_cov_146.703457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_virus.1769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539271" accession="ERS11141239">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141239</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539271</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_virus.2446</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.3738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_2446_length_28480_cov_33.704503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_2446_length_28480_cov_33.704503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.2329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539272" accession="ERS11141240">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141240</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539272</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_virus.319</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.1888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_319_length_99633_cov_47.540078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_319_length_99633_cov_47.540078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539273" accession="ERS11141241">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141241</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539273</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738616_virus.521</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738616.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.2818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738616) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559925) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_521_length_78064_cov_32.425122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_13_2496__NODE_521_length_78064_cov_32.425122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_virus.521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539274" accession="ERS11141242">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141242</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539274</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738617_provirus.111</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>276.704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_111_length_148414_cov_169.333295_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738617_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48484848484848486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_111_length_148414_cov_169.333295_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738154_provirus.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539275" accession="ERS11141243">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141243</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539275</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738617_provirus.22</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.3246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_22_length_288334_cov_22.206569_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738617_bin.90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_22_length_288334_cov_22.206569_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738617_provirus.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539276" accession="ERS11141244">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141244</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539276</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738617_provirus.449</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis provirus assembled from ERR7738617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_449_length_74767_cov_7.660021_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738617_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5757575757575758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_449_length_74767_cov_7.660021_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME007797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__CAG-115 sp003531585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_provirus.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539277" accession="ERS11141245">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141245</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539277</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738617_provirus.93</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_93_length_163345_cov_5.958694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738617_bin.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6037735849056604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_93_length_163345_cov_5.958694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738617_provirus.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539278" accession="ERS11141246">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141246</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539278</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738617_virus.1687</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_1687_length_33539_cov_8.343763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_1687_length_33539_cov_8.343763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539279" accession="ERS11141247">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141247</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539279</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738617_virus.286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_286_length_96398_cov_28.481037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.380952380952381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_286_length_96398_cov_28.481037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738617_virus.286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539280" accession="ERS11141248">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141248</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539280</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738617_virus.668</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738617.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.1083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738617) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559313) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_668_length_61513_cov_18.649473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_2_2602__NODE_668_length_61513_cov_18.649473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_virus.855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539281" accession="ERS11141249">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141249</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539281</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738619_provirus.236</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>237.189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568531) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_236_length_104010_cov_136.797966_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_236_length_104010_cov_136.797966_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745769_provirus.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539282" accession="ERS11141250">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141250</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539282</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738619_provirus.536</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568531) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_536_length_65469_cov_11.089919_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8787878787878788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_536_length_65469_cov_11.089919_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_C</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0379536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539283" accession="ERS11141251">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141251</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539283</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738619_provirus.921</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13104347289138735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568531) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_921_length_44475_cov_10.866683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738619_bin.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_921_length_44475_cov_10.866683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745769_provirus.441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539284" accession="ERS11141252">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141252</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539284</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738619_virus.1291</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.74907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568531) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_1291_length_34119_cov_4.124581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_1291_length_34119_cov_4.124581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539285" accession="ERS11141253">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141253</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539285</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738619_virus.53</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.4495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568531) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_53_length_206038_cov_14.223518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_53_length_206038_cov_14.223518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738619_virus.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539286" accession="ERS11141254">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141254</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539286</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738619_virus.983</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738619.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.1877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738619) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568531) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_983_length_42223_cov_15.799862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738619_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6206896551724138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_3_THA1067YZ_1__NODE_983_length_42223_cov_15.799862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Mitsuokella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745769_virus.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539287" accession="ERS11141255">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141255</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539287</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_provirus.1703</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu provirus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_1703_length_47191_cov_7.655432_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.972972972972973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_1703_length_47191_cov_7.655432_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738535_provirus.895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539288" accession="ERS11141256">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141256</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539288</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_provirus.299</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>189.397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_299_length_137573_cov_105.404295_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_299_length_137573_cov_105.404295_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738620_provirus.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539289" accession="ERS11141257">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141257</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539289</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_provirus.543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_543_length_99106_cov_7.856204_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738620_bin.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_543_length_99106_cov_7.856204_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__TF01-11;s__TF01-11 sp001414325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738620_provirus.543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539290" accession="ERS11141258">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141258</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539290</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_provirus.84</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.6985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_84_length_247936_cov_11.120419_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_84_length_247936_cov_11.120419_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME074354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Negativibacillus;s__Negativibacillus sp000435195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738620_provirus.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539291" accession="ERS11141259">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141259</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539291</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_virus.1384</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.2184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_1384_length_55100_cov_55.109863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6585365853658537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_1384_length_55100_cov_55.109863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539292" accession="ERS11141260">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141260</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539292</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_virus.1629</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.1007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_1629_length_48825_cov_22.155247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_1629_length_48825_cov_22.155247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0313669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539293" accession="ERS11141261">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141261</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539293</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_virus.2052</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1520.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_2052_length_40534_cov_850.182811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_2052_length_40534_cov_850.182811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0318076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539294" accession="ERS11141262">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141262</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539294</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_virus.2434</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.2206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_2434_length_35118_cov_17.983391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_2434_length_35118_cov_17.983391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539295" accession="ERS11141263">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141263</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539295</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_virus.2924</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.0653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_2924_length_29834_cov_15.558457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_2924_length_29834_cov_15.558457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0305583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539296" accession="ERS11141264">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141264</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539296</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_virus.3547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:14Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_3547_length_24667_cov_58.178528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_3547_length_24667_cov_58.178528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738549_virus.1198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539297" accession="ERS11141265">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141265</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539297</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738620_virus.556</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738620.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19988986784140972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1234.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738620) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_556_length_97955_cov_672.984981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_C_24_1038__NODE_556_length_97955_cov_672.984981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides merdae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0378020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539298" accession="ERS11141266">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141266</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539298</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738621_provirus.230</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567230) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_230_length_128898_cov_7.351790_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738621_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6904761904761905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_230_length_128898_cov_7.351790_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0360932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539299" accession="ERS11141267">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141267</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539299</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738621_provirus.585</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.1698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567230) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_585_length_73191_cov_40.650573_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738621_bin.200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_585_length_73191_cov_40.650573_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_provirus.585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539300" accession="ERS11141268">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141268</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539300</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738621_virus.1295</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.0386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567230) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_1295_length_43320_cov_15.811761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_1295_length_43320_cov_15.811761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.1295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539301" accession="ERS11141269">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141269</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539301</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738621_virus.1577</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567230) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_1577_length_37354_cov_8.329238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_1577_length_37354_cov_8.329238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738144_virus.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539302" accession="ERS11141270">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141270</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539302</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738621_virus.1780</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.9801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567230) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_1780_length_34052_cov_16.833966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_1780_length_34052_cov_16.833966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME015136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Oribacterium;s__Oribacterium sp004554245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.1780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539303" accession="ERS11141271">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141271</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539303</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738621_virus.2352</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Klebsiella phage vB_Kpn_Chronis virus assembled from ERR7738621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.97818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567230) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_2352_length_26910_cov_5.736034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8214285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_2352_length_26910_cov_5.736034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738619_provirus.722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Klebsiella phage vB_Kpn_Chronis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539304" accession="ERS11141272">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141272</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539304</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738621_virus.669</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.9603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567230) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_669_length_66994_cov_19.289104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_17_RAJ0019YZ__NODE_669_length_66994_cov_19.289104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0344814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539305" accession="ERS11141273">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141273</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539305</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_provirus.1080</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.6872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1080_length_66357_cov_13.600347_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738622_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.90625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1080_length_66357_cov_13.600347_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.2214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539306" accession="ERS11141274">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141274</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539306</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_provirus.1555</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1555_length_51925_cov_8.215939_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738622_bin.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1555_length_51925_cov_8.215939_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.1328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539307" accession="ERS11141275">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141275</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539307</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_provirus.2675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.6343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2675_length_35947_cov_33.724589_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738622_bin.342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2675_length_35947_cov_33.724589_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737966_provirus.195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539308" accession="ERS11141276">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141276</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539308</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_provirus.531</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16636888074277426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>673.618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_531_length_100010_cov_430.630983_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_531_length_100010_cov_430.630983_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539309" accession="ERS11141277">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141277</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539309</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.107</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.7818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_107_length_203772_cov_17.602474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6515151515151515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_107_length_203772_cov_17.602474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales;f__Monoglobaceae;g__UMGS1820;s__UMGS1820 sp900545865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539310" accession="ERS11141278">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141278</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539310</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.1289</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.8313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1289_length_58946_cov_18.125720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1289_length_58946_cov_18.125720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539311" accession="ERS11141279">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141279</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539311</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.1452</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.8618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1452_length_54205_cov_40.573215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7058823529411765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1452_length_54205_cov_40.573215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp003514385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539312" accession="ERS11141280">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141280</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539312</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.1630</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.5825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1630_length_50489_cov_11.244723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_1630_length_50489_cov_11.244723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745419_virus.909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539313" accession="ERS11141281">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141281</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539313</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.181</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3974118942731278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>104.656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_181_length_170056_cov_59.059454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5370370370370371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_181_length_170056_cov_59.059454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539314" accession="ERS11141282">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141282</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539314</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0758246448305204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_197_length_161909_cov_8.488482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_197_length_161909_cov_8.488482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539315" accession="ERS11141283">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141283</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539315</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.2096</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.0921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2096_length_42386_cov_70.276915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2096_length_42386_cov_70.276915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME172369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-917;g__CAG-349;s__CAG-349 sp003539515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539316" accession="ERS11141284">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141284</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539316</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.2331</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.7921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2331_length_39488_cov_12.559742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2331_length_39488_cov_12.559742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.2331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539317" accession="ERS11141285">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141285</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539317</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.2522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0292130974318661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2522_length_37323_cov_6.286098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2522_length_37323_cov_6.286098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>Siphoviridae environmental samples</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539318" accession="ERS11141286">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141286</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539318</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.2674</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.0666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2674_length_35949_cov_45.513242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_2674_length_35949_cov_45.513242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0281755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539319" accession="ERS11141287">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141287</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539319</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.2808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_286_length_137918_cov_35.654841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_286_length_137918_cov_35.654841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539320" accession="ERS11141288">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141288</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539320</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.326</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_326_length_131687_cov_41.669455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5692307692307692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_326_length_131687_cov_41.669455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738227_provirus.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539321" accession="ERS11141289">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141289</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539321</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.3654</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_3654_length_28589_cov_10.222257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_3654_length_28589_cov_10.222257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.3335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539322" accession="ERS11141290">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141290</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539322</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.4321</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:31:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.1302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_4321_length_25038_cov_47.794840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_4321_length_25038_cov_47.794840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539323" accession="ERS11141291">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141291</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539323</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.4967</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11754819338193057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_4967_length_22309_cov_8.896815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738622_bin.230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_4967_length_22309_cov_8.896815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__CAG-115 sp000432175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.4967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539324" accession="ERS11141292">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141292</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539324</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.561</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2445250585754259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.5645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_561_length_96509_cov_20.716142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.971830985915493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_561_length_96509_cov_20.716142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539325" accession="ERS11141293">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141293</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539325</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.7390</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_7390_length_16180_cov_10.428988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_7390_length_16180_cov_10.428988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_virus.7390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539326" accession="ERS11141294">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141294</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539326</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738622_virus.926</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738622.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07634348183251045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.16592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738622) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561693) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_926_length_72462_cov_4.402722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4705882352941176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_8_1202__NODE_926_length_72462_cov_4.402722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539327" accession="ERS11141295">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141295</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539327</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_provirus.1328</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05236784140969162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.6587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_1328_length_64368_cov_14.732684_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_1328_length_64368_cov_14.732684_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_provirus.1328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539328" accession="ERS11141296">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141296</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539328</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_provirus.19</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>177.474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_19_length_407728_cov_130.914125_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738623_bin.155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_19_length_407728_cov_130.914125_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539329" accession="ERS11141297">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141297</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539329</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_provirus.2941</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03469162995594714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.4774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2941_length_38609_cov_13.940828_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2941_length_38609_cov_13.940828_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.2135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539330" accession="ERS11141298">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141298</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539330</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_provirus.5405</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_5405_length_23888_cov_7.682290_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_5405_length_23888_cov_7.682290_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME280504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900555735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_virus.927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539331" accession="ERS11141299">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141299</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539331</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_provirus.960</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.06782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_960_length_78533_cov_4.805968_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_960_length_78533_cov_4.805968_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_provirus.960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539332" accession="ERS11141300">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141300</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539332</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.1453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.6437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_1453_length_61277_cov_16.002958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_1453_length_61277_cov_16.002958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.1453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539333" accession="ERS11141301">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141301</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539333</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.2006</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.6677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2006_length_50140_cov_31.044324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2006_length_50140_cov_31.044324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.2006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539334" accession="ERS11141302">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141302</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539334</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.2271</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>98.6439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2271_length_46600_cov_54.400662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2271_length_46600_cov_54.400662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.2271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539335" accession="ERS11141303">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141303</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539335</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.2443</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Kagunavirus virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.0282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2443_length_44423_cov_47.904839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.647887323943662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2443_length_44423_cov_47.904839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.2443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Guernseyvirinae; Kagunavirus; unclassified Kagunavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539336" accession="ERS11141304">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141304</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539336</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.2601</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.7556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2601_length_42352_cov_31.433235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6129032258064516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2601_length_42352_cov_31.433235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.2601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539337" accession="ERS11141305">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141305</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539337</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.2754</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.5866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2754_length_40490_cov_9.588004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_2754_length_40490_cov_9.588004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.2754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539338" accession="ERS11141306">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141306</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539338</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.3027</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1380538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.9364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3027_length_37764_cov_22.492106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3027_length_37764_cov_22.492106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738249_virus.786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539339" accession="ERS11141307">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141307</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539339</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.3123</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.0657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3123_length_36931_cov_13.318527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3123_length_36931_cov_13.318527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-95;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_virus.1199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539340" accession="ERS11141308">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141308</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539340</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.3330</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3330_length_35302_cov_12.905323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3330_length_35302_cov_12.905323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.3330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539341" accession="ERS11141309">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141309</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539341</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.3607</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3607_length_33040_cov_6.476049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_3607_length_33040_cov_6.476049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.3607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539342" accession="ERS11141310">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141310</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539342</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.4328</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.8844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_4328_length_28540_cov_14.720655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_4328_length_28540_cov_14.720655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.4328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539343" accession="ERS11141311">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141311</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539343</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.545</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>299.354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_545_length_109168_cov_181.391911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9583333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_545_length_109168_cov_181.391911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738616_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4334;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745398_virus.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539344" accession="ERS11141312">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141312</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539344</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.6621</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.1145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_6621_length_20025_cov_14.348606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_6621_length_20025_cov_14.348606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.6964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539345" accession="ERS11141313">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141313</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539345</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738623_virus.9006</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738623.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0490368859781216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>120.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738623) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560388) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_9006_length_15431_cov_65.908628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_13_1776__NODE_9006_length_15431_cov_65.908628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.9006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539346" accession="ERS11141314">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141314</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539346</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_provirus.1360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.939977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>77.7798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1360_length_69077_cov_42.582609_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3636363636363637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1360_length_69077_cov_42.582609_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746349_bin.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Sphaerochaetales;f__Sphaerochaetaceae;g__UBA5920;s__UBA5920 sp002406055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_provirus.1360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539347" accession="ERS11141315">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141315</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539347</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_provirus.2071</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7243672824538925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.0368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2071_length_51432_cov_44.450609_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2071_length_51432_cov_44.450609_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_virus.1910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539348" accession="ERS11141316">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141316</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539348</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_provirus.3315</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3315_length_37000_cov_6.339355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3315_length_37000_cov_6.339355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539349" accession="ERS11141317">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141317</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539349</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_provirus.82</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_82_length_307987_cov_14.584418_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738624_bin.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_82_length_307987_cov_14.584418_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME087442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger sp900540775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_provirus.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539350" accession="ERS11141318">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141318</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539350</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.1121</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.5762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1121_length_77827_cov_10.263434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1121_length_77827_cov_10.263434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.1121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539351" accession="ERS11141319">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141319</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539351</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.1604</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.2592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1604_length_62260_cov_26.977084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1604_length_62260_cov_26.977084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539352" accession="ERS11141320">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141320</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539352</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.17409</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14693597737148006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.13522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_17409_length_8995_cov_5.005831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_17409_length_8995_cov_5.005831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738240_virus.6136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539353" accession="ERS11141321">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141321</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539353</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.1936</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06497797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>145.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1936_length_53914_cov_79.535877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_1936_length_53914_cov_79.535877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.1936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539354" accession="ERS11141322">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141322</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539354</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.22395</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03576461450908247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.27637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_22395_length_7061_cov_3.252005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_22395_length_7061_cov_3.252005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738664_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.22395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539355" accession="ERS11141323">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141323</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539355</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.2414</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2414_length_46217_cov_6.203229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2414_length_46217_cov_6.203229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0354261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539356" accession="ERS11141324">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141324</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539356</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.2662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.5991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2662_length_43050_cov_14.888767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2662_length_43050_cov_14.888767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.2662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539357" accession="ERS11141325">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141325</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539357</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.2790</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>90.3341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2790_length_41636_cov_49.421449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2790_length_41636_cov_49.421449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.2790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539358" accession="ERS11141326">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141326</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539358</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.2888</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2123898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>173.999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2888_length_40667_cov_96.042868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_2888_length_40667_cov_96.042868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539359" accession="ERS11141327">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141327</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539359</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.3043</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3043_length_39255_cov_6.667492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3043_length_39255_cov_6.667492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1750;g__UBA7102;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.3043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539360" accession="ERS11141328">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141328</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539360</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.3414</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:16Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.73335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3414_length_36441_cov_5.270047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738624_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3414_length_36441_cov_5.270047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_provirus.1694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539361" accession="ERS11141329">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141329</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539361</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.3596</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.2575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3596_length_35131_cov_57.202630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_3596_length_35131_cov_57.202630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.3596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539362" accession="ERS11141330">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141330</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539362</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.4017</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>96.3481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_4017_length_32463_cov_101.704718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8214285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_4017_length_32463_cov_101.704718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.4017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539363" accession="ERS11141331">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141331</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539363</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.4353</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.8785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_4353_length_30404_cov_30.082105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738624_bin.169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_4353_length_30404_cov_30.082105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__UBA1234;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.4353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539364" accession="ERS11141332">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141332</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539364</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.524</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.5677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_524_length_120294_cov_16.247103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6779661016949152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_524_length_120294_cov_16.247103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746738_bin.136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900554275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539365" accession="ERS11141333">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141333</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539365</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.6249</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>229.786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_6249_length_22533_cov_125.240871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_6249_length_22533_cov_125.240871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.6249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539366" accession="ERS11141334">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141334</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539366</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.692</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_692_length_101877_cov_8.391022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8409090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_692_length_101877_cov_8.391022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539367" accession="ERS11141335">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141335</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539367</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.8489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13923512386358416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_8489_length_17364_cov_6.924625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_8489_length_17364_cov_6.924625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738221_virus.847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539368" accession="ERS11141336">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141336</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539368</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738624_virus.9966</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738624.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738624) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561717) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_9966_length_15123_cov_5.929483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_F_16_1210__NODE_9966_length_15123_cov_5.929483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME129025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp900763685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.9966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539369" accession="ERS11141337">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141337</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539369</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_provirus.143</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>174.501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_143_length_187871_cov_98.686545_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738626_bin.497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_143_length_187871_cov_98.686545_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-533;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_provirus.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539370" accession="ERS11141338">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141338</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539370</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_provirus.2664</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.0927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2664_length_44145_cov_19.909345_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2664_length_44145_cov_19.909345_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_provirus.2664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539371" accession="ERS11141339">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141339</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539371</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_provirus.525</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.6164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_525_length_116744_cov_39.395356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_525_length_116744_cov_39.395356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME233192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Ligilactobacillus;s__Ligilactobacillus ruminis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_provirus.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539372" accession="ERS11141340">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141340</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539372</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_provirus.765</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_765_length_96270_cov_9.603984_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_765_length_96270_cov_9.603984_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Megasphaeraceae;g__Megasphaera;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_provirus.765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539373" accession="ERS11141341">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141341</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539373</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.102</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:11Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_102_length_211674_cov_30.849743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.676056338028169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_102_length_211674_cov_30.849743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539374" accession="ERS11141342">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141342</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539374</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.1110</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>215.298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1110_length_77006_cov_126.939711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6486486486486487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1110_length_77006_cov_126.939711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738148_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Selenomonas_A;s__Selenomonas_A sp900769615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.1110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539375" accession="ERS11141343">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141343</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539375</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.1342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>150.764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1342_length_68258_cov_85.979569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1342_length_68258_cov_85.979569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745545_provirus.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539376" accession="ERS11141344">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141344</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539376</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.1574</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1574_length_61762_cov_8.956294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9594594594594594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1574_length_61762_cov_8.956294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.1574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539377" accession="ERS11141345">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141345</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539377</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.1790</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.7565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1790_length_57244_cov_30.389333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_1790_length_57244_cov_30.389333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.1790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539378" accession="ERS11141346">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141346</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539378</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.2055</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.8579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2055_length_52449_cov_22.220404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48484848484848486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2055_length_52449_cov_22.220404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides sp900549585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0328206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539379" accession="ERS11141347">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141347</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539379</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_217_length_162704_cov_11.091166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.711864406779661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_217_length_162704_cov_11.091166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME007974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp003516765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539380" accession="ERS11141348">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141348</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539380</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.2415</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.6976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2415_length_47166_cov_27.405211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2415_length_47166_cov_27.405211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.2415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539381" accession="ERS11141349">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141349</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539381</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.2704</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2704_length_43563_cov_11.869958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_2704_length_43563_cov_11.869958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.2704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539382" accession="ERS11141350">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141350</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539382</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.282</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.5604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_282_length_148810_cov_10.559761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6101694915254238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_282_length_148810_cov_10.559761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746285_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFTH01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539383" accession="ERS11141351">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141351</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539383</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.3011</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.9283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3011_length_40155_cov_13.094316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3011_length_40155_cov_13.094316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_virus.1078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539384" accession="ERS11141352">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141352</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539384</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.3219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3219_length_38352_cov_7.504376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738626_bin.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3219_length_38352_cov_7.504376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.3219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539385" accession="ERS11141353">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141353</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539385</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.3319</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.14563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3319_length_37497_cov_4.130786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3319_length_37497_cov_4.130786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738264_bin.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__UBA4644;s__UBA4644 sp900763355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.3319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539386" accession="ERS11141354">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141354</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539386</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.3597</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>104.277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3597_length_35552_cov_62.133559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42105263157894735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3597_length_35552_cov_62.133559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.3597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539387" accession="ERS11141355">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141355</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539387</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.3880</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.6969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3880_length_33552_cov_48.034503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3880_length_33552_cov_48.034503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.3880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539388" accession="ERS11141356">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141356</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539388</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.3964</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04616727037876595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.5699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3964_length_33012_cov_53.950235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_3964_length_33012_cov_53.950235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.3964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539389" accession="ERS11141357">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141357</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539389</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.4360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.1568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_4360_length_30740_cov_22.127907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_4360_length_30740_cov_22.127907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746742_provirus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539390" accession="ERS11141358">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141358</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539390</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.5141</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.9107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_5141_length_26778_cov_9.826561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_5141_length_26778_cov_9.826561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.5141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539391" accession="ERS11141359">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141359</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539391</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.5530</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17820990407225873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>842.724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_5530_length_25352_cov_517.558932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_5530_length_25352_cov_517.558932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.5530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539392" accession="ERS11141360">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141360</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539392</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.5924</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_5924_length_24056_cov_10.774344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_5924_length_24056_cov_10.774344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.1659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539393" accession="ERS11141361">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141361</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539393</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.674</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_674_length_103208_cov_9.390940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3703703703703703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_674_length_103208_cov_9.390940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0368347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539394" accession="ERS11141362">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141362</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539394</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_71_length_231910_cov_9.018556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_71_length_231910_cov_9.018556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME246739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;s__Lachnospira eligens_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539396" accession="ERS11141364">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141364</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539396</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.833</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Acinetobacter phage MD-2021a virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09666904456843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.4229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_833_length_91666_cov_5.328850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_833_length_91666_cov_5.328850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738582_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Lentisphaeria;o__Victivallales;f__UBA1829;g__UBA1829;s__UBA1829 sp900549415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0378462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Acinetobacter phage MD-2021a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539397" accession="ERS11141365">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141365</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539397</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_provirus.1066</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_1066_length_53616_cov_6.996227_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738627_bin.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_1066_length_53616_cov_6.996227_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.2506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539398" accession="ERS11141366">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141366</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539398</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_provirus.18</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.8294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_18_length_314921_cov_20.325888_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738627_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_18_length_314921_cov_20.325888_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME103816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;s__Dorea_A longicatena</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738627_provirus.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539399" accession="ERS11141367">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141367</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539399</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_provirus.287</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.98993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_287_length_112226_cov_5.428332_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738627_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_287_length_112226_cov_5.428332_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides uniformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0297872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539400" accession="ERS11141368">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141368</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539400</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_provirus.787</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6694113353173287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>433.021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_787_length_64860_cov_243.055292_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8620689655172413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_787_length_64860_cov_243.055292_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539401" accession="ERS11141369">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141369</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539401</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_virus.122</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_122_length_165359_cov_59.554265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4358974358974359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_122_length_165359_cov_59.554265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0377424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539402" accession="ERS11141370">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141370</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539402</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_virus.161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>103.532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_161_length_145778_cov_56.659172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6290322580645161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_161_length_145778_cov_56.659172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_virus.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539403" accession="ERS11141371">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141371</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539403</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_virus.1934</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_1934_length_35085_cov_6.597892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6551724137931034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_1934_length_35085_cov_6.597892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_provirus.1146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539404" accession="ERS11141372">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141372</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539404</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_virus.211</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11854347289138734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_211_length_129774_cov_8.142178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5675675675675675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_211_length_129774_cov_8.142178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738627_virus.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539405" accession="ERS11141373">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141373</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539405</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_virus.3336</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Erysipelothrix phage phi1605 virus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>232.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_3336_length_21738_cov_130.460228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_3336_length_21738_cov_130.460228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737969_virus.1469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Erysipelothrix phage phi1605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539406" accession="ERS11141374">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141374</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539406</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_virus.3998</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08491189427312776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_3998_length_18291_cov_9.886516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_3998_length_18291_cov_9.886516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738627_virus.3998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539407" accession="ERS11141375">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141375</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539407</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738627_virus.775</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738627.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7773678414096915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.5911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738627) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561880) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_775_length_65283_cov_19.846026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_6_1150__NODE_775_length_65283_cov_19.846026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539408" accession="ERS11141376">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141376</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539408</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.10</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.5674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_10_length_430606_cov_20.313324_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.603448275862069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_10_length_430606_cov_20.313324_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_virus.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539409" accession="ERS11141377">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141377</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539409</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.125</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_125_length_197494_cov_7.145874_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738628_bin.251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_125_length_197494_cov_7.145874_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__CAG-196;s__CAG-196 sp900553895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738174_provirus.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539410" accession="ERS11141378">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141378</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539410</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.1557</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_1557_length_52925_cov_6.912561_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738628_bin.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_1557_length_52925_cov_6.912561_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS902;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.1557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539411" accession="ERS11141379">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141379</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539411</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.1991</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_1991_length_45489_cov_11.016758_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738628_bin.459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_1991_length_45489_cov_11.016758_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_provirus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539412" accession="ERS11141380">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141380</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539412</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.2577</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>106.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2577_length_38727_cov_61.143984_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2577_length_38727_cov_61.143984_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.2577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539413" accession="ERS11141381">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141381</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539413</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.3489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1574339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.8744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3489_length_32501_cov_29.901709_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9583333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3489_length_32501_cov_29.901709_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.3489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539414" accession="ERS11141382">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141382</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539414</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.634</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20743392070484584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_634_length_86014_cov_5.625784_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_634_length_86014_cov_5.625784_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.2031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539415" accession="ERS11141383">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141383</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539415</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_provirus.956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>291.826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_956_length_70063_cov_172.460978_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_956_length_70063_cov_172.460978_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539416" accession="ERS11141384">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141384</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539416</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.11190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.9569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_11190_length_14529_cov_22.237130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_11190_length_14529_cov_22.237130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A sp900066205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.8840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539417" accession="ERS11141385">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141385</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539417</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.1398</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_1398_length_56395_cov_10.232608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_1398_length_56395_cov_10.232608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.1369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539419" accession="ERS11141386">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141386</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539419</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.18628</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.94501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_18628_length_9789_cov_4.471890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_18628_length_9789_cov_4.471890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.11552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539420" accession="ERS11141387">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141387</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539420</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.2247</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2247_length_42103_cov_7.861134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8484848484848485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2247_length_42103_cov_7.861134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539421" accession="ERS11141388">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141388</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539421</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.2444</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2444_length_40069_cov_6.731396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2444_length_40069_cov_6.731396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_provirus.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539422" accession="ERS11141389">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141389</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539422</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.2829</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2829_length_36723_cov_6.448071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_2829_length_36723_cov_6.448071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.1657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539423" accession="ERS11141390">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141390</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539423</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.3125</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3125_length_34673_cov_9.159932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9310344827586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3125_length_34673_cov_9.159932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_provirus.213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539424" accession="ERS11141391">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141391</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539424</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.3382</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>4.9472295514511e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3382_length_33089_cov_10.512935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3382_length_33089_cov_10.512935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio;s__Desulfovibrio piger</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.1884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539425" accession="ERS11141392">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141392</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539425</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.3981</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.2721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3981_length_29803_cov_17.359012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_3981_length_29803_cov_17.359012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.3981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539426" accession="ERS11141393">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141393</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539426</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.4470</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.1648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_4470_length_27745_cov_14.512289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_4470_length_27745_cov_14.512289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.4103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539427" accession="ERS11141394">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141394</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539427</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.5769</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02673512386358416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.8483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_5769_length_23200_cov_57.775072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_5769_length_23200_cov_57.775072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539428" accession="ERS11141395">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141395</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539428</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.6656</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05927917672702029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_6656_length_20994_cov_7.065162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_6656_length_20994_cov_7.065162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745854_bin.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.4620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539429" accession="ERS11141396">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141396</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539429</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738628_virus.8397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738628.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738628) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567910) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_8397_length_17902_cov_9.833885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_14_1640__NODE_8397_length_17902_cov_9.833885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME017973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__UMGS124;g__UMGS124;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.8397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539430" accession="ERS11141397">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141397</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539430</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_provirus.147</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.7627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_147_length_136264_cov_27.971121_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738629_bin.140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_147_length_136264_cov_27.971121_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_provirus.800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539431" accession="ERS11141398">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141398</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539431</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_provirus.343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.27241189427312773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_343_length_92715_cov_8.955170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_343_length_92715_cov_8.955170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_provirus.343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539432" accession="ERS11141399">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141399</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539432</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_provirus.542</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.7565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_542_length_72053_cov_17.514157_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9107142857142856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_542_length_72053_cov_17.514157_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738256_virus.400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539433" accession="ERS11141400">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141400</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539433</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_provirus.877</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>335.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_877_length_55204_cov_225.212527_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_877_length_55204_cov_225.212527_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539434" accession="ERS11141401">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141401</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539434</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.1222</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>165.243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_1222_length_44361_cov_97.015559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_1222_length_44361_cov_97.015559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539435" accession="ERS11141402">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141402</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539435</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.1616</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09245594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_1616_length_36913_cov_6.978255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.935483870967742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_1616_length_36913_cov_6.978255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539436" accession="ERS11141403">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141403</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539436</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.2236</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_2236_length_29532_cov_5.709217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_2236_length_29532_cov_5.709217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738588_bin.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_virus.2236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539437" accession="ERS11141404">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141404</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539437</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.2693</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:25Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_2693_length_25529_cov_5.664545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_2693_length_25529_cov_5.664545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_virus.2693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539438" accession="ERS11141405">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141405</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539438</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.3241</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1940058974551755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>395.893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_3241_length_22147_cov_346.848890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_3241_length_22147_cov_346.848890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_provirus.918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539439" accession="ERS11141406">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141406</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539439</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.661</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.7677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_661_length_64588_cov_17.269504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_661_length_64588_cov_17.269504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738566_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539440" accession="ERS11141407">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141407</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539440</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.843</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_843_length_56559_cov_6.415902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9423076923076924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_843_length_56559_cov_6.415902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0338867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539441" accession="ERS11141408">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141408</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539441</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738629_virus.94</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738629.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738629) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561301) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_94_length_159386_cov_13.265798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7377049180327869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_N_17_1305__NODE_94_length_159386_cov_13.265798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738605_virus.100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539442" accession="ERS11141409">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141409</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539442</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_provirus.1469</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.5067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1469_length_60587_cov_15.630689_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1469_length_60587_cov_15.630689_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.1126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539443" accession="ERS11141410">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141410</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539443</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_provirus.208</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.0074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_208_length_192523_cov_14.741257_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738630_bin.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_208_length_192523_cov_14.741257_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__ML615J-28;f__CAG-313;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_provirus.208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539444" accession="ERS11141411">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141411</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539444</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_provirus.383</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.2159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_383_length_138006_cov_29.148982_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738630_bin.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_383_length_138006_cov_29.148982_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-452;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_provirus.383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539445" accession="ERS11141412">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141412</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539445</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_provirus.888</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>209.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_888_length_84185_cov_118.308853_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738630_bin.226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_888_length_84185_cov_118.308853_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746757_provirus.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539446" accession="ERS11141413">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141413</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539446</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.1328</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.3585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1328_length_64808_cov_25.458714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9318181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1328_length_64808_cov_25.458714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.1328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539447" accession="ERS11141414">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141414</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539447</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.1552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07804291393558824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.3413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1552_length_58037_cov_35.339545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1552_length_58037_cov_35.339545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.1552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539448" accession="ERS11141415">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141415</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539448</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.1871</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>78.7506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1871_length_50711_cov_43.484141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8032786885245902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_1871_length_50711_cov_43.484141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539449" accession="ERS11141416">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141416</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539449</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.2205</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2205_length_44977_cov_5.840935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2205_length_44977_cov_5.840935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738568_bin.326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__SFEL01;s__SFEL01 sp004557245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_provirus.614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539450" accession="ERS11141417">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141417</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539450</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.2305</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.2111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2305_length_43708_cov_28.568334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4827586206896552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2305_length_43708_cov_28.568334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738242_virus.549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539451" accession="ERS11141418">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141418</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539451</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.2411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06384348183251046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>411.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2411_length_42214_cov_226.859316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2411_length_42214_cov_226.859316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745890_bin.206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.2411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539452" accession="ERS11141419">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141419</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539452</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.25696</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Chlamydiamicrovirus virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.64988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_25696_length_5334_cov_4.545939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_25696_length_5334_cov_4.545939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738513_bin.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__Zag1;s__Zag1 sp001765415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.25696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; Chlamydiamicrovirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539453" accession="ERS11141420">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141420</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539453</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.2725</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2725_length_38483_cov_5.465474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2725_length_38483_cov_5.465474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.2601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539454" accession="ERS11141421">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141421</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539454</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.2963</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.7514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2963_length_36167_cov_55.489609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_2963_length_36167_cov_55.489609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.2963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539455" accession="ERS11141422">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141422</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539455</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.3264</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.2136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_3264_length_33299_cov_49.897538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_3264_length_33299_cov_49.897538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.3264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539456" accession="ERS11141423">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141423</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539456</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.3840</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.06166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_3840_length_29149_cov_5.175117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_3840_length_29149_cov_5.175117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539457" accession="ERS11141424">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141424</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539457</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.44605</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3.92001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_44605_length_3238_cov_1.879152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_44605_length_3238_cov_1.879152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.44605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539458" accession="ERS11141425">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141425</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539458</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.5047</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_5047_length_23002_cov_19.472715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_5047_length_23002_cov_19.472715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738161_provirus.435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539459" accession="ERS11141426">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141426</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539459</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738630_virus.710</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr130_1 virus assembled from ERR7738630.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>270.474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738630) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_710_length_97650_cov_147.080606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5490196078431373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_D_12_1182__NODE_710_length_97650_cov_147.080606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella stercorea</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; crAssphage cr130_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539460" accession="ERS11141427">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141427</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539460</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_provirus.1086</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.4246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1086_length_59406_cov_23.582531_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1086_length_59406_cov_23.582531_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539461" accession="ERS11141428">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141428</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539461</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_provirus.220</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.23799472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:27Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_220_length_144485_cov_10.227958_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738631_bin.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_220_length_144485_cov_10.227958_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__CAG-115 sp000432175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_provirus.220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539462" accession="ERS11141429">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141429</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539462</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_provirus.384</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.5177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_384_length_112502_cov_35.963353_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738631_bin.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9318181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_384_length_112502_cov_35.963353_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0367225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539463" accession="ERS11141430">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141430</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539463</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_provirus.685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>91.488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_685_length_81055_cov_61.143261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738631_bin.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_685_length_81055_cov_61.143261_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745403_provirus.913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539464" accession="ERS11141431">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141431</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539464</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.1101</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.3777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1101_length_58730_cov_58.095051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9629629629629628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1101_length_58730_cov_58.095051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME261446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp003538135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.1101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539465" accession="ERS11141432">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141432</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539465</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.1431</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.5869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1431_length_48487_cov_10.124726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9736842105263158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1431_length_48487_cov_10.124726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747083_virus.752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539466" accession="ERS11141433">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141433</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539466</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.1646</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>207.981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1646_length_43573_cov_127.678798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4827586206896552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1646_length_43573_cov_127.678798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738242_virus.549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539467" accession="ERS11141434">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141434</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539467</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.1824</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.8827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1824_length_40335_cov_35.537831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_1824_length_40335_cov_35.537831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738241_virus.1176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539468" accession="ERS11141435">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141435</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539468</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.2013</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_2013_length_37589_cov_14.315632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738631_bin.241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_2013_length_37589_cov_14.315632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738584_bin.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.2013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539469" accession="ERS11141436">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141436</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539469</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.270</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.4962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_270_length_133210_cov_14.493206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6458333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_270_length_133210_cov_14.493206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539470" accession="ERS11141437">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141437</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539470</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.4393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.36396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_4393_length_20240_cov_5.028666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_4393_length_20240_cov_5.028666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-465;g__CAG-465;s__CAG-465 sp000433755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_provirus.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539471" accession="ERS11141438">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141438</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539471</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.501</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12878892628021937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.8641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_501_length_98778_cov_38.059999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738631_bin.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_501_length_98778_cov_38.059999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746012_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539472" accession="ERS11141439">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141439</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539472</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.652</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_652_length_83643_cov_13.150600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4583333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_652_length_83643_cov_13.150600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539473" accession="ERS11141440">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141440</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539473</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738631_virus.872</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738631.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.1103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738631) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561193) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_872_length_69528_cov_38.205008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6590909090909091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_6_1603__NODE_872_length_69528_cov_38.205008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539474" accession="ERS11141441">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141441</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539474</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_provirus.1339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1339_length_56718_cov_5.835049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738633_bin.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.96875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1339_length_56718_cov_5.835049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0239997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539475" accession="ERS11141442">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141442</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539475</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_provirus.1489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus provirus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1489_length_52254_cov_24.068862_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.26666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1489_length_52254_cov_24.068862_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738633_provirus.1489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539477" accession="ERS11141444">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141444</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539477</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_provirus.579</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_579_length_102393_cov_8.649947_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738633_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_579_length_102393_cov_8.649947_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0251556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539478" accession="ERS11141445">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141445</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539478</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_provirus.80</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 provirus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>194.998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_80_length_287853_cov_109.143907_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738633_bin.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2916666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_80_length_287853_cov_109.143907_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME261564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;s__Alistipes onderdonkii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738633_provirus.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539479" accession="ERS11141446">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141446</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539479</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_virus.1319</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.1358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1319_length_57251_cov_17.680152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9130434782608696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1319_length_57251_cov_17.680152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0345861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539480" accession="ERS11141447">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141447</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539480</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_virus.1663</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>594.092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1663_length_47739_cov_335.225106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_1663_length_47739_cov_335.225106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides uniformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0335683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539481" accession="ERS11141448">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141448</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539481</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_virus.2133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>893.385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_2133_length_38552_cov_516.065367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_2133_length_38552_cov_516.065367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0318076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539483" accession="ERS11141450">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141450</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539483</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_virus.3008</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>331.437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_3008_length_27982_cov_183.799892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6071428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_3008_length_27982_cov_183.799892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539484" accession="ERS11141451">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141451</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539484</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_virus.4132</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09677917672702029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.2059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_4132_length_20203_cov_4.401669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_4132_length_20203_cov_4.401669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0266477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539485" accession="ERS11141452">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141452</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539485</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_virus.621</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1762.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_621_length_97849_cov_976.126192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_621_length_97849_cov_976.126192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides merdae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0378020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539486" accession="ERS11141453">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141453</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539486</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_provirus.1333</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.1959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1333_length_57740_cov_32.755805_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738643_bin.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9666666666666668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1333_length_57740_cov_32.755805_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_provirus.360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539487" accession="ERS11141454">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141454</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539487</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_provirus.1796</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1796_length_47414_cov_19.238883_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1796_length_47414_cov_19.238883_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides sp900549585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_provirus.1796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539488" accession="ERS11141455">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141455</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539488</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_provirus.296</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_296_length_135861_cov_8.302974_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738643_bin.361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_296_length_135861_cov_8.302974_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738591_bin.140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Oxalobacter;s__Oxalobacter sp900760095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_provirus.296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539489" accession="ERS11141456">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141456</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539489</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_provirus.510</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn provirus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0699339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.9742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_510_length_102579_cov_41.683684_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_510_length_102579_cov_41.683684_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539490" accession="ERS11141457">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141457</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539490</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.1058</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1058_length_67268_cov_8.631201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738643_bin.334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5945945945945946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1058_length_67268_cov_8.631201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539492" accession="ERS11141459">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141459</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539492</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.1553</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.1114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1553_length_52281_cov_31.882270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1553_length_52281_cov_31.882270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738643_bin.213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__UBA644;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539493" accession="ERS11141460">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141460</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539493</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.1852</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.5667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1852_length_46401_cov_33.103985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1852_length_46401_cov_33.103985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539494" accession="ERS11141461">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141461</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539494</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.1985</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1985_length_44276_cov_6.491663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1985_length_44276_cov_6.491663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Duodenibacillus;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539495" accession="ERS11141462">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141462</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539495</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.2110</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.5162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2110_length_42443_cov_16.216117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2110_length_42443_cov_16.216117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__MARSEILLE-P3954;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.2110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539496" accession="ERS11141463">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141463</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539496</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.2223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2223_length_40864_cov_7.970334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2223_length_40864_cov_7.970334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_virus.1013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539498" accession="ERS11141465">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141465</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539498</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.248</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_248_length_147750_cov_16.290182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8035714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_248_length_147750_cov_16.290182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp003514385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539499" accession="ERS11141466">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141466</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539499</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.2705</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2705_length_35901_cov_9.822047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2705_length_35901_cov_9.822047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.2061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539500" accession="ERS11141467">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141467</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539500</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.306</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.9237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_306_length_134273_cov_47.309026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.45714285714285713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_306_length_134273_cov_47.309026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539501" accession="ERS11141468">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141468</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539501</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.3682</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_3682_length_28592_cov_6.198036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_3682_length_28592_cov_6.198036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745594_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;s__UBA636 sp900546285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.3682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539502" accession="ERS11141469">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141469</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539502</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.4292</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.19539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_4292_length_25259_cov_5.118696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_4292_length_25259_cov_5.118696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900541925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.2618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539503" accession="ERS11141470">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141470</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539503</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.4947</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.51528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_4947_length_22530_cov_4.864829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_4947_length_22530_cov_4.864829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.4947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539504" accession="ERS11141471">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141471</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539504</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.578</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>216.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_578_length_93687_cov_127.979853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9302325581395348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_578_length_93687_cov_127.979853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539506" accession="ERS11141473">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141473</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539506</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738645_provirus.11</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738645.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.1405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738645) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_11_length_385521_cov_22.112771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738645_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9655172413793104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_11_length_385521_cov_22.112771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME233192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Ligilactobacillus;s__Ligilactobacillus ruminis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738645_provirus.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539507" accession="ERS11141474">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141474</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539507</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738645_provirus.423</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738645.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738645) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_423_length_64776_cov_6.185536_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738645_bin.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_423_length_64776_cov_6.185536_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides merdae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0267854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539508" accession="ERS11141475">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141475</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539508</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738645_virus.1230</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738645.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06390472738781326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:17Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.43836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738645) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_1230_length_29135_cov_4.250981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_1230_length_29135_cov_4.250981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0236306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539509" accession="ERS11141476">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141476</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539509</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738645_virus.309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738645.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.3828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738645) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_309_length_79927_cov_24.238610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_309_length_79927_cov_24.238610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738645_virus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539510" accession="ERS11141477">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141477</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539510</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738645_virus.575</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738645.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539510</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738645) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_575_length_53357_cov_8.901802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7878787878787878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_575_length_53357_cov_8.901802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0321081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539512" accession="ERS11141479">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141479</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539512</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_provirus.105</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08997797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.3128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_105_length_189621_cov_16.281133_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738651_bin.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>48.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_105_length_189621_cov_16.281133_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-103;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_provirus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539513" accession="ERS11141480">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141480</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539513</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_provirus.1930</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15001018949221068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1930_length_44500_cov_12.667154_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738651_bin.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1930_length_44500_cov_12.667154_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__UBA10281;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.1673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539514" accession="ERS11141481">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141481</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539514</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_provirus.3388</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24217479228380095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.9233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_3388_length_29636_cov_26.713996_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_3388_length_29636_cov_26.713996_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_provirus.3388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539515" accession="ERS11141482">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141482</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539515</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_provirus.4360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_4360_length_24483_cov_6.034049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_4360_length_24483_cov_6.034049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Slackia_A;s__Slackia_A isoflavoniconvertens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_provirus.248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539516" accession="ERS11141483">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141483</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539516</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_provirus.911</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.2684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_911_length_69980_cov_27.794530_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738651_bin.264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_911_length_69980_cov_27.794530_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746344_virus.1947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539517" accession="ERS11141484">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141484</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539517</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.1157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>213.558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1157_length_61557_cov_128.232824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1157_length_61557_cov_128.232824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME157359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__P3;g__UBA3388;s__UBA3388 sp900545215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_provirus.172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539518" accession="ERS11141485">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141485</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539518</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.1469</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.3814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1469_length_53146_cov_32.849291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42105263157894735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1469_length_53146_cov_32.849291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539519" accession="ERS11141486">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141486</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539519</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.1737</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1737_length_47895_cov_24.973420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_1737_length_47895_cov_24.973420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539521" accession="ERS11141488">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141488</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539521</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.2313</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.4631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2313_length_39168_cov_33.746898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2313_length_39168_cov_33.746898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746692_virus.650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539522" accession="ERS11141489">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141489</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539522</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.2665</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.4174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2665_length_35594_cov_45.014219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2665_length_35594_cov_45.014219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME026639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900548625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.2665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539523" accession="ERS11141490">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141490</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539523</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.2917</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2917_length_33367_cov_7.879003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9032258064516128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2917_length_33367_cov_7.879003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.2917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539524" accession="ERS11141491">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141491</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539524</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.331</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5255538234328847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.2469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_331_length_116978_cov_32.639781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8985507246376812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_331_length_116978_cov_32.639781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_provirus.176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539525" accession="ERS11141492">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141492</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539525</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.4081</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04730176211453744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.03181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_4081_length_25710_cov_5.463348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_4081_length_25710_cov_5.463348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.4081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539527" accession="ERS11141494">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141494</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539527</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.5165</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02425715029530222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.90661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_5165_length_21363_cov_5.356525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_5165_length_21363_cov_5.356525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738179_virus.1723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539528" accession="ERS11141495">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141495</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539528</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.6999</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04805480131144586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>473.717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_6999_length_16637_cov_290.403744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_6999_length_16637_cov_290.403744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_provirus.586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539529" accession="ERS11141496">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141496</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539529</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738653_provirus.102</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738653.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.9288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738653) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567268) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_102_length_101035_cov_22.336774_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6792452830188679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_102_length_101035_cov_22.336774_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738653_provirus.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539530" accession="ERS11141497">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141497</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539530</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738653_provirus.317</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738653.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.86532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738653) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567268) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_317_length_57828_cov_4.737979_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8620689655172413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_317_length_57828_cov_4.737979_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738564_virus.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539531" accession="ERS11141498">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141498</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539531</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738653_provirus.781</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738653.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.04486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738653) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567268) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_781_length_30757_cov_4.789276_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738653_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_781_length_30757_cov_4.789276_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738653_provirus.781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539532" accession="ERS11141499">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141499</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539532</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738653_virus.324</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738653.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738653) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567268) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_324_length_56949_cov_26.568276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_5_RAU0022YZ__NODE_324_length_56949_cov_26.568276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738653_virus.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539534" accession="ERS11141501">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141501</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539534</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_provirus.161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18308091782530816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.1762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_161_length_178384_cov_30.953900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_161_length_178384_cov_30.953900_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME273772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__RUG841;s__RUG841 sp900313795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_provirus.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539535" accession="ERS11141502">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141502</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539535</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_provirus.224</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05883159685814017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_224_length_158995_cov_8.481437_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738657_bin.318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_224_length_158995_cov_8.481437_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_provirus.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539536" accession="ERS11141503">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141503</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539536</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_provirus.3025</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.3553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_3025_length_37575_cov_5.435970_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>48.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_3025_length_37575_cov_5.435970_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745414_bin.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900768625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_provirus.348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539537" accession="ERS11141504">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141504</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539537</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_provirus.494</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.2814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_494_length_112167_cov_32.878357_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738657_bin.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_494_length_112167_cov_32.878357_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738230_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Fibrobacterota;c__Fibrobacteria;o__Fibrobacterales;f__Fibrobacteraceae;g__Fibrobacter_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745419_virus.1042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539538" accession="ERS11141505">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141505</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539538</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_provirus.811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.2984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_811_length_87042_cov_28.584120_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738657_bin.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_811_length_87042_cov_28.584120_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900554205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_provirus.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539540" accession="ERS11141507">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141507</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539540</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.1378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1378_length_63830_cov_8.117955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1378_length_63830_cov_8.117955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738630_bin.313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFFH01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.1378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539541" accession="ERS11141508">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141508</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539541</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.1636</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.5349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1636_length_57143_cov_35.952984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9767441860465116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1636_length_57143_cov_35.952984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.1636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539542" accession="ERS11141509">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141509</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539542</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.1862</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.827477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.3458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1862_length_53217_cov_21.131916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1862_length_53217_cov_21.131916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539543" accession="ERS11141510">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141510</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539543</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.2548</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.7334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_2548_length_42693_cov_5.352661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_2548_length_42693_cov_5.352661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745858_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Anaerotignaceae;g__UMGS1670;s__UMGS1670 sp900553995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.2548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539544" accession="ERS11141511">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141511</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539544</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.2843</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07725770925110134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.05333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_2843_length_39512_cov_4.043667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738657_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_2843_length_39512_cov_4.043667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__RF32;f__CAG-977;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539545" accession="ERS11141512">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141512</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539545</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.3095</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.3913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_3095_length_36910_cov_29.292699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_3095_length_36910_cov_29.292699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME110466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900546925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.3095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539546" accession="ERS11141513">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141513</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539546</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.3508</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.77967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_3508_length_33632_cov_5.500611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_3508_length_33632_cov_5.500611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.3508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539547" accession="ERS11141514">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141514</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539547</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.4120</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.864977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.3971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_4120_length_29702_cov_16.757401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_4120_length_29702_cov_16.757401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539548" accession="ERS11141515">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141515</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539548</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.4808</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.86894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_4808_length_26345_cov_4.310644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_4808_length_26345_cov_4.310644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_virus.1413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539550" accession="ERS11141517">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141517</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539550</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.6222</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_6222_length_21251_cov_7.474922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_6222_length_21251_cov_7.474922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000434975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_provirus.717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539551" accession="ERS11141518">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141518</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539551</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.755</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.3589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_755_length_90440_cov_50.271062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6388888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_755_length_90440_cov_50.271062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539552" accession="ERS11141519">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141519</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539552</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.9088</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11722655367650484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.8333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_9088_length_15541_cov_14.475168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_9088_length_15541_cov_14.475168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.9088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539553" accession="ERS11141520">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141520</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539553</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_provirus.126</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1998898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.0418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_126_length_183778_cov_32.280929_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738659_bin.278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_126_length_183778_cov_32.280929_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539554" accession="ERS11141521">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141521</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539554</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_provirus.1817</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.5398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_1817_length_44429_cov_43.426565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_1817_length_44429_cov_43.426565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539556" accession="ERS11141523">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141523</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539556</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_provirus.522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06862117793506581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.9425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_522_length_91361_cov_17.956849_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_522_length_91361_cov_17.956849_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.1178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539557" accession="ERS11141524">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141524</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539557</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_provirus.894</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06321814991225902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_894_length_69593_cov_14.694962_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_894_length_69593_cov_14.694962_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_provirus.810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539558" accession="ERS11141525">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141525</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539558</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.1155</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_1155_length_58649_cov_45.401079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_1155_length_58649_cov_45.401079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745384_bin.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900553155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.1155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539559" accession="ERS11141526">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141526</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539559</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.1769</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.8593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_1769_length_45026_cov_15.244477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738659_bin.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_1769_length_45026_cov_15.244477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME277835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__CAG-269 sp000437215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.1769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539560" accession="ERS11141527">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141527</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539560</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.2129</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.3411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_2129_length_39951_cov_37.847821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_2129_length_39951_cov_37.847821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746051_bin.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger variabilis_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738221_virus.807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539561" accession="ERS11141528">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141528</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539561</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.2602</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>240.086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_2602_length_34482_cov_134.511554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_2602_length_34482_cov_134.511554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738575_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-279;s__CAG-279 sp000437795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_virus.618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539562" accession="ERS11141529">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141529</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539562</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.3090</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0322136563876652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_3090_length_30241_cov_11.263824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738659_bin.147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_3090_length_30241_cov_11.263824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.3090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539564" accession="ERS11141531">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141531</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539564</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.5572</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0574339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.61145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_5572_length_18706_cov_4.172044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738659_bin.146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_5572_length_18706_cov_4.172044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.5572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539565" accession="ERS11141532">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141532</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539565</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.9032</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.5811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_9032_length_12246_cov_3.533733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_9032_length_12246_cov_3.533733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.2423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539566" accession="ERS11141533">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141533</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539566</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_provirus.1402</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1402_length_52897_cov_7.473911_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738660_bin.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1402_length_52897_cov_7.473911_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738660_provirus.1402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539567" accession="ERS11141534">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141534</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539567</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_provirus.1915</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0998898678414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1915_length_43654_cov_12.358974_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1915_length_43654_cov_12.358974_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_virus.1263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539568" accession="ERS11141535">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141535</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539568</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_provirus.2895</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_2895_length_33994_cov_7.839225_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_2895_length_33994_cov_7.839225_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_provirus.583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539570" accession="ERS11141537">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141537</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539570</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_provirus.837</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_837_length_70477_cov_11.620810_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_837_length_70477_cov_11.620810_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.1803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539571" accession="ERS11141538">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141538</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539571</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.1164</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.71874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1164_length_58633_cov_5.503364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1164_length_58633_cov_5.503364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_virus.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539572" accession="ERS11141539">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141539</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539572</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.1600</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1600_length_48767_cov_8.439433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1600_length_48767_cov_8.439433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_virus.236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539573" accession="ERS11141540">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141540</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539573</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.1988</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1988_length_42622_cov_7.104689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_1988_length_42622_cov_7.104689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738660_virus.1988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539574" accession="ERS11141541">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141541</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539574</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.2406</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_2406_length_38044_cov_22.647036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_2406_length_38044_cov_22.647036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738660_virus.2406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539575" accession="ERS11141542">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141542</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539575</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.2658</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01495011113590989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.8605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_2658_length_35853_cov_19.593806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_2658_length_35853_cov_19.593806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.1008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539576" accession="ERS11141543">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141543</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539576</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.3387</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_3387_length_30790_cov_7.254355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_3387_length_30790_cov_7.254355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_provirus.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539577" accession="ERS11141544">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141544</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539577</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.3956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.9323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_3956_length_27863_cov_18.091845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_3956_length_27863_cov_18.091845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_provirus.155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539395" accession="ERS11141363">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141363</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539395</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738626_virus.783</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7738626.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>264.427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738626) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560783) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_783_length_95353_cov_149.218250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.967741935483871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_17_1727__NODE_783_length_95353_cov_149.218250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539476" accession="ERS11141443">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141443</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539476</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_provirus.2187</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:15Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.1396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_2187_length_37837_cov_37.214619_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738633_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_2187_length_37837_cov_37.214619_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides cellulosilyticus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738549_virus.1198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539482" accession="ERS11141449">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141449</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539482</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738633_virus.2444</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738633.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>301.851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738633) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558881) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_2444_length_34072_cov_171.826857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber-Hadza-Nepal_M_3_7038__NODE_2444_length_34072_cov_171.826857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539491" accession="ERS11141458">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141458</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539491</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.1402</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.5135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1402_length_55740_cov_27.818964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_1402_length_55740_cov_27.818964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539497" accession="ERS11141464">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141464</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539497</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.2383</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04988986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2383_length_38988_cov_11.314436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_2383_length_38988_cov_11.314436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME041878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__CAG-1024;s__CAG-1024 sp000432015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_virus.898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539505" accession="ERS11141472">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141472</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539505</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738643_virus.7467</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738643.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.45121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738643) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560103) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_7467_length_16167_cov_4.642014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738643_bin.310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_21_2150__NODE_7467_length_16167_cov_4.642014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.7467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539511" accession="ERS11141478">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141478</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539511</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738645_virus.834</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738645.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T00:30:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.9205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738645) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567408) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_834_length_39486_cov_24.848588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_7_THA0058AZ__NODE_834_length_39486_cov_24.848588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0288732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539520" accession="ERS11141487">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141487</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539520</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.2027</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>403.312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2027_length_42937_cov_247.536771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_2027_length_42937_cov_247.536771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.2027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539526" accession="ERS11141493">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141493</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539526</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738651_virus.4565</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738651.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1931310196624605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.42055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738651) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560070) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_4565_length_23636_cov_5.582622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_9_2144__NODE_4565_length_23636_cov_5.582622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.4565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539533" accession="ERS11141500">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141500</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539533</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_provirus.1060</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.2593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1060_length_75272_cov_15.740049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_1060_length_75272_cov_15.740049_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0357521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539539" accession="ERS11141506">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141506</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539539</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.10911</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_10911_length_13259_cov_5.962828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_10911_length_13259_cov_5.962828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.10911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539549" accession="ERS11141516">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141516</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539549</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738657_virus.5624</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738657.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.42707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738657) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_5624_length_23165_cov_4.119499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_5_1733__NODE_5624_length_23165_cov_4.119499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0370468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539555" accession="ERS11141522">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141522</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539555</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_provirus.326</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6799559471365638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>157.343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_326_length_116635_cov_91.101735_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_326_length_116635_cov_91.101735_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539563" accession="ERS11141530">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141530</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539563</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738659_virus.4373</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738659.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738659) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560611) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_4373_length_22864_cov_6.396717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_22_1650__NODE_4373_length_22864_cov_6.396717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME171351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Veillonellaceae;g__Veillonella;s__Veillonella parvula_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.4373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539569" accession="ERS11141536">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141536</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539569</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_provirus.4003</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08210352422907494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.47235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_4003_length_27621_cov_4.697611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_4003_length_27621_cov_4.697611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_provirus.1432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539578" accession="ERS11141545">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141545</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539578</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.4884</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0932209057437508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.6942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_4884_length_24198_cov_20.023672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_4884_length_24198_cov_20.023672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__Ruminococcus sp900540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738240_virus.1786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539579" accession="ERS11141546">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141546</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539579</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.6524</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.21619739868756788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.6002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_6524_length_19907_cov_13.878719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_6524_length_19907_cov_13.878719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_provirus.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539580" accession="ERS11141547">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141547</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539580</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738660_virus.8364</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.7548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567818) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_8364_length_16734_cov_30.644294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_8_1525__NODE_8364_length_16734_cov_30.644294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.4855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539581" accession="ERS11141548">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141548</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539581</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_provirus.1525</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.5184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1525_length_61350_cov_12.835115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1525_length_61350_cov_12.835115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738579_provirus.1307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539582" accession="ERS11141549">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141549</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539582</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_provirus.2343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:10Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.2568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2343_length_46388_cov_13.547386_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738661_bin.10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2343_length_46388_cov_13.547386_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746398_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Oribacterium;s__Oribacterium sp900772695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_provirus.2343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539583" accession="ERS11141550">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141550</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539583</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_provirus.3120</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_3120_length_37039_cov_7.864509_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_3120_length_37039_cov_7.864509_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D bicirculans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_provirus.3120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539584" accession="ERS11141551">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141551</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539584</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_provirus.603</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>445.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_603_length_110119_cov_258.126215_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738661_bin.218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_603_length_110119_cov_258.126215_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.1446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539585" accession="ERS11141552">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141552</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539585</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.1202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.0509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1202_length_72388_cov_15.287439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1202_length_72388_cov_15.287439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.1202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539586" accession="ERS11141553">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141553</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539586</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.1429</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.1278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1429_length_64160_cov_42.792254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9736842105263158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1429_length_64160_cov_42.792254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.1429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539587" accession="ERS11141554">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141554</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539587</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.1640</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>710.091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1640_length_58735_cov_414.649715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9722222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1640_length_58735_cov_414.649715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.1640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539588" accession="ERS11141555">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141555</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539588</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.1864</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.1844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1864_length_53952_cov_58.621698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_1864_length_53952_cov_58.621698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738207_virus.383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539589" accession="ERS11141556">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141556</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539589</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.2094</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17129856875493005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>112.639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2094_length_50292_cov_65.716698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2094_length_50292_cov_65.716698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.2094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539590" accession="ERS11141557">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141557</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539590</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1820121253599783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.9305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_240_length_169652_cov_24.171269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_240_length_169652_cov_24.171269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539591" accession="ERS11141558">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141558</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539591</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.2517</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.5518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2517_length_43834_cov_10.288845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2517_length_43834_cov_10.288845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.2245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539592" accession="ERS11141559">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141559</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539592</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.2654</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.1884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2654_length_41917_cov_21.127749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2654_length_41917_cov_21.127749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.2654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539593" accession="ERS11141560">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141560</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539593</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.2830</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2830_length_40096_cov_15.109798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2830_length_40096_cov_15.109798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.2830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539594" accession="ERS11141561">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141561</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539594</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.2998</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.7882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2998_length_38172_cov_19.994619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_2998_length_38172_cov_19.994619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.2998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539595" accession="ERS11141562">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141562</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539595</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.3203</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.05689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_3203_length_36166_cov_4.519244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_3203_length_36166_cov_4.519244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738553_bin.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__UBA1067;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.3203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539596" accession="ERS11141563">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141563</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539596</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.3723</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Pbunavirus virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11701541850220272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>201.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_3723_length_32109_cov_117.807224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_3723_length_32109_cov_117.807224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Elusimicrobiota;c__Elusimicrobia;o__Elusimicrobiales;f__Elusimicrobiaceae;g__UBA1436;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.3723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Pbunavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539597" accession="ERS11141564">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141564</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539597</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.430</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1434.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_430_length_131134_cov_825.191298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_430_length_131134_cov_825.191298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539598" accession="ERS11141565">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141565</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539598</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.49</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.8499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_49_length_318951_cov_22.299877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4189189189189189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_49_length_318951_cov_22.299877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746349_bin.313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-95;s__CAG-95 sp000436115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539599" accession="ERS11141566">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141566</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539599</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.5768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1389047273878133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.2754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_5768_length_21960_cov_8.851254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738661_bin.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_5768_length_21960_cov_8.851254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.5768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539600" accession="ERS11141567">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141567</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539600</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.696</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.5313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_696_length_99931_cov_32.971508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_696_length_99931_cov_32.971508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738203_virus.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539601" accession="ERS11141568">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141568</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539601</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738661_virus.832</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738661.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738661) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_832_length_89973_cov_13.829025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_7_2143__NODE_832_length_89973_cov_13.829025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539602" accession="ERS11141569">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141569</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539602</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_provirus.1016</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05938930888561059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.4339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_1016_length_64100_cov_24.837449_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5897435897435898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_1016_length_64100_cov_24.837449_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0347707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539603" accession="ERS11141570">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141570</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539603</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_provirus.220</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>290.812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_220_length_169769_cov_180.897343_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738662_bin.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_220_length_169769_cov_180.897343_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0341270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539604" accession="ERS11141571">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141571</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539604</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_provirus.469</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus provirus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>877.444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_469_length_114623_cov_520.939483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_469_length_114623_cov_520.939483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0354100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539605" accession="ERS11141572">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141572</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539605</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_provirus.797</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.2215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_797_length_78610_cov_51.147034_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6410256410256411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_797_length_78610_cov_51.147034_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0313669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539606" accession="ERS11141573">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141573</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539606</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_virus.1498</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.7704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_1498_length_45207_cov_29.822358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_1498_length_45207_cov_29.822358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738662_virus.1498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539607" accession="ERS11141574">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141574</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539607</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_virus.1855</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu virus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_1855_length_36803_cov_7.826281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_1855_length_36803_cov_7.826281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539608" accession="ERS11141575">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141575</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539608</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_virus.3333</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03986784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.59727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_3333_length_19828_cov_3.653638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_3333_length_19828_cov_3.653638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME158963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UMGS856;s__UMGS856 sp900546265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738662_virus.3333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539609" accession="ERS11141576">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141576</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539609</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738662_virus.972</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738662.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738662) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567250) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_972_length_66412_cov_20.951413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_3_RAJ1020YZ__NODE_972_length_66412_cov_20.951413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738662_virus.972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539610" accession="ERS11141577">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141577</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539610</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_provirus.196</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_196_length_149285_cov_17.707777_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_196_length_149285_cov_17.707777_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_provirus.196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539611" accession="ERS11141578">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141578</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539611</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_provirus.351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.3063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_351_length_115285_cov_11.727042_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738664_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_351_length_115285_cov_11.727042_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_provirus.351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539612" accession="ERS11141579">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141579</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539612</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_provirus.512</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19367550097940467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_512_length_98705_cov_6.261711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738664_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_512_length_98705_cov_6.261711_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738193_provirus.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539613" accession="ERS11141580">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141580</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539613</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_provirus.851</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1621696035242291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>144.068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_851_length_75820_cov_83.597243_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_851_length_75820_cov_83.597243_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_provirus.1304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539614" accession="ERS11141581">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141581</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539614</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_virus.1147</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>367.802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_1147_length_63032_cov_213.025034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9210526315789472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_1147_length_63032_cov_213.025034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_virus.594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539615" accession="ERS11141582">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141582</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539615</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_virus.1724</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_1724_length_48434_cov_7.274583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738664_bin.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_1724_length_48434_cov_7.274583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.1527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539616" accession="ERS11141583">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141583</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539616</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_virus.2071</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.7881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_2071_length_42316_cov_16.946092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_2071_length_42316_cov_16.946092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_virus.2071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539617" accession="ERS11141584">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141584</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539617</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_virus.2375</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.1933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_2375_length_38317_cov_21.651464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_2375_length_38317_cov_21.651464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_virus.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539618" accession="ERS11141585">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141585</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539618</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_virus.2798</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.8733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_2798_length_33989_cov_20.744309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_2798_length_33989_cov_20.744309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745545_virus.1252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539619" accession="ERS11141586">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141586</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539619</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_virus.428</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.7491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_428_length_106650_cov_20.271542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_428_length_106650_cov_20.271542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_virus.428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539620" accession="ERS11141587">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141587</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539620</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738664_virus.647</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738664.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>137.787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738664) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559409) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_647_length_87174_cov_80.318289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_16_2625__NODE_647_length_87174_cov_80.318289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745545_virus.231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539621" accession="ERS11141588">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141588</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539621</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_provirus.1234</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.8469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_1234_length_80781_cov_71.516368_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7045454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_1234_length_80781_cov_71.516368_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0318097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539622" accession="ERS11141589">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141589</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539622</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_provirus.1573</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:09Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.3197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_1573_length_68925_cov_14.380026_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9069767441860463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_1573_length_68925_cov_14.380026_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738665_provirus.1573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539623" accession="ERS11141590">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141590</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539623</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_provirus.200</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18997797356828192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.9225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_200_length_202183_cov_25.621802_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738665_bin.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_200_length_202183_cov_25.621802_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0348753</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539624" accession="ERS11141591">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141591</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539624</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_provirus.2476</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.39021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2476_length_49159_cov_5.484577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738665_bin.365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2476_length_49159_cov_5.484577_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME121515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__RF32;f__CAG-239;g__51-20;s__51-20 sp001917175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0244827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539625" accession="ERS11141592">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141592</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539625</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_provirus.384</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>446.319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_384_length_152183_cov_265.505417_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_384_length_152183_cov_265.505417_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME246707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__Phil1;s__Phil1 sp001940855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_provirus.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539626" accession="ERS11141593">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141593</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539626</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_provirus.547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.0242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_547_length_128425_cov_10.800355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738665_bin.270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_547_length_128425_cov_10.800355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides merdae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738665_provirus.547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539627" accession="ERS11141594">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141594</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539627</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_provirus.878</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.2967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_878_length_98088_cov_20.774566_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_878_length_98088_cov_20.774566_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;s__Mediterraneibacter torques</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738665_provirus.878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539628" accession="ERS11141595">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141595</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539628</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.1162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phage DP SC_6_H4_2017 virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>290.828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_1162_length_83070_cov_170.444495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_1162_length_83070_cov_170.444495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME258505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0356247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Phage DP SC_6_H4_2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539629" accession="ERS11141596">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141596</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539629</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.2054</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>254.111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2054_length_56731_cov_150.924295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2054_length_56731_cov_150.924295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738546_virus.1270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539630" accession="ERS11141597">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141597</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539630</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.2421</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2421_length_50040_cov_15.464784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2421_length_50040_cov_15.464784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900545925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738546_virus.621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539631" accession="ERS11141598">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141598</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539631</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.2739</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.3896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2739_length_45235_cov_27.765712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6129032258064516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_2739_length_45235_cov_27.765712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Fournierella;s__Fournierella sp900543285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0311266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539632" accession="ERS11141599">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141599</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539632</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.3308</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_3308_length_38776_cov_7.237551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738665_bin.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_3308_length_38776_cov_7.237551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME114842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS731;s__UMGS731 sp900544985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738665_virus.3308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539633" accession="ERS11141600">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141600</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539633</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.3559</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19672273990657685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>349.472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_3559_length_36459_cov_207.029080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_3559_length_36459_cov_207.029080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes_A;s__Alistipes_A ihumii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0269636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539634" accession="ERS11141601">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141601</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539634</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.4389</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04666904456842997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.0187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_4389_length_30309_cov_4.627779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_4389_length_30309_cov_4.627779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738665_virus.4389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539635" accession="ERS11141602">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141602</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539635</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.4998</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11488212098404647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.1971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_4998_length_26861_cov_33.150314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_4998_length_26861_cov_33.150314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738665_virus.4998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539636" accession="ERS11141603">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141603</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539636</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.5677</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09741189427312776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.5738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_5677_length_23829_cov_13.481138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738665_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_5677_length_23829_cov_13.481138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0277841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539637" accession="ERS11141604">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141604</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539637</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.835</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microcystis phage MaAM05 virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>216.667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_835_length_101059_cov_128.793498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_835_length_101059_cov_128.793498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME017739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Lentisphaeria;o__Victivallales;f__UBA1829;g__UBA11452;s__UBA11452 sp003526375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738665_virus.835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Microcystis phage MaAM05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539638" accession="ERS11141605">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141605</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539638</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738665_virus.961</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738665.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.2589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738665) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_961_length_92955_cov_26.900138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738665_bin.244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9384027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_I_11_1003__NODE_961_length_92955_cov_26.900138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539639" accession="ERS11141606">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141606</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539639</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_provirus.1732</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.63249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1732_length_41801_cov_3.538563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1732_length_41801_cov_3.538563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Senegalimassilia;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_provirus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539640" accession="ERS11141607">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141607</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539640</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_provirus.480</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>145.009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_480_length_89518_cov_83.049183_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738936_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_480_length_89518_cov_83.049183_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_virus.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539641" accession="ERS11141608">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141608</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539641</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_provirus.800</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_800_length_68039_cov_8.156264_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_800_length_68039_cov_8.156264_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539642" accession="ERS11141609">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141609</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539642</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.1123</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03358896201113708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>648.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1123_length_56092_cov_412.690262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1123_length_56092_cov_412.690262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.1537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539643" accession="ERS11141610">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141610</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539643</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.1502</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1502_length_46489_cov_57.357645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4230769230769231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1502_length_46489_cov_57.357645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.1285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539644" accession="ERS11141611">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141611</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539644</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.1844</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.51927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1844_length_40170_cov_4.927768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_1844_length_40170_cov_4.927768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.1844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539645" accession="ERS11141612">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141612</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539645</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.2012</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16497797356828195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_2012_length_37999_cov_8.553267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_2012_length_37999_cov_8.553267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Mitsuokella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.2012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539646" accession="ERS11141613">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141613</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539646</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.2303</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01110955218654152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_2303_length_34932_cov_5.979142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_2303_length_34932_cov_5.979142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-273;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.2303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539647" accession="ERS11141614">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141614</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539647</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.2592</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.9874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_2592_length_32198_cov_9.016967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_2592_length_32198_cov_9.016967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.2185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539648" accession="ERS11141615">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141615</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539648</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.3617</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07047260194774403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.0052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_3617_length_25374_cov_17.823932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_3617_length_25374_cov_17.823932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_virus.2347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539649" accession="ERS11141616">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141616</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539649</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.4207</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.40776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_4207_length_22668_cov_4.112965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_4207_length_22668_cov_4.112965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738561_bin.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__UBA1191 sp900066305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.4207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539650" accession="ERS11141617">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141617</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539650</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.55</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.0725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_55_length_212516_cov_11.165426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5471698113207547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_55_length_212516_cov_11.165426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__RF32;f__CAG-239;g__RUG410;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539651" accession="ERS11141618">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141618</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539651</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.7258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.8955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_7258_length_15082_cov_38.048117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_7258_length_15082_cov_38.048117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea;s__Dorea formicigenerans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.7258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539652" accession="ERS11141619">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141619</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539652</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738936_virus.850</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738936.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738936) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560332) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_850_length_66179_cov_8.026202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_19_1739__NODE_850_length_66179_cov_8.026202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747782_bin.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Duodenibacillus;s__Duodenibacillus sp900544255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539653" accession="ERS11141620">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141620</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539653</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_provirus.1047</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_1047_length_74371_cov_8.903465_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738937_bin.210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_1047_length_74371_cov_8.903465_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__UBA7182;s__UBA7182 sp003480725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738937_provirus.1047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539654" accession="ERS11141621">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141621</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539654</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_provirus.1290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.6157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_1290_length_63374_cov_42.468190_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738937_bin.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_1290_length_63374_cov_42.468190_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_F;s__Eubacterium_F sp003491505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_provirus.475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539655" accession="ERS11141622">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141622</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539655</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_provirus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_272_length_157166_cov_38.515491_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_272_length_157166_cov_38.515491_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Bilophila;s__Bilophila wadsworthia</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_provirus.1840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539656" accession="ERS11141623">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141623</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539656</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_provirus.48</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>169.389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_48_length_323305_cov_97.463441_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.978021978021978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_48_length_323305_cov_97.463441_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0371292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539657" accession="ERS11141624">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141624</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539657</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_provirus.878</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>8.6317135549872e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_878_length_83818_cov_48.675619_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_878_length_83818_cov_48.675619_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__UMGS1375;s__UMGS1375 sp900066615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_provirus.623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539658" accession="ERS11141625">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141625</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539658</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_virus.1797</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.7202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_1797_length_47890_cov_15.670069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6944444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_1797_length_47890_cov_15.670069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539659" accession="ERS11141626">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141626</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539659</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_virus.2060</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.7209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_2060_length_42982_cov_18.326023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_2060_length_42982_cov_18.326023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738276_virus.1589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539660" accession="ERS11141627">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141627</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539660</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_virus.2490</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.9909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_2490_length_36605_cov_27.509335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_2490_length_36605_cov_27.509335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0251436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539661" accession="ERS11141628">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141628</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539661</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738937_virus.4339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17433920704845826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4558872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_4339_length_21592_cov_8.693098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>USA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>37.4275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-122.1697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1016/j.cell.2021.06.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>SRS9383899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Pilot_MoBio_Fiber_L_20_7018__NODE_4339_length_21592_cov_8.693098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738937_virus.4339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539662" accession="ERS11141629">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141629</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539662</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_provirus.1144</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.17695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1144_length_50566_cov_4.247658_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738938_bin.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1144_length_50566_cov_4.247658_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_provirus.1144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539663" accession="ERS11141630">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141630</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539663</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_provirus.3443</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus provirus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06557860569609461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.05502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_3443_length_21852_cov_4.273203_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_3443_length_21852_cov_4.273203_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>Siphoviridae environmental samples</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_provirus.3443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539664" accession="ERS11141631">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141631</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539664</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_provirus.930</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03997797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.1415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_930_length_57738_cov_30.729644_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_930_length_57738_cov_30.729644_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539665" accession="ERS11141632">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141632</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539665</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.1384</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>225.438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1384_length_43976_cov_129.139320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1384_length_43976_cov_129.139320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.1384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539666" accession="ERS11141633">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141633</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539666</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.1576</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1576_length_40278_cov_10.662745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1576_length_40278_cov_10.662745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__CAG-238;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.1576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539667" accession="ERS11141634">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141634</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539667</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.1623</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25243631530483024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1623_length_39422_cov_6.228771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_1623_length_39422_cov_6.228771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.1623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539668" accession="ERS11141635">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141635</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539668</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.179</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Enterococcus phage EF5 virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04343334073279702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.4655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_179_length_139821_cov_46.192581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9294871794871796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_179_length_139821_cov_46.192581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Enterococcus phage EF5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539669" accession="ERS11141636">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141636</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539669</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_202_length_131027_cov_6.033501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43243243243243246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_202_length_131027_cov_6.033501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_virus.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539670" accession="ERS11141637">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141637</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539670</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.2519</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.30247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.9024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_2519_length_28472_cov_23.288678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_2519_length_28472_cov_23.288678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539671" accession="ERS11141638">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141638</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539671</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.2847</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06442731277533043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.7257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_2847_length_25622_cov_5.280211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_2847_length_25622_cov_5.280211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.1842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539672" accession="ERS11141639">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141639</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539672</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.3497</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Clostridium phage vB_CpeS-1181 virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08986784140969165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_3497_length_21559_cov_8.037147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738938_bin.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_3497_length_21559_cov_8.037147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P perfringens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.3497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Clostridium phage vB_CpeS-1181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539673" accession="ERS11141640">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141640</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539673</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.404</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.38586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_404_length_91872_cov_4.455537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8205128205128205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_404_length_91872_cov_4.455537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539674" accession="ERS11141641">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141641</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539674</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.598</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Maenadvirus virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>227.448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_598_length_73945_cov_129.966291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.845360824742268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_598_length_73945_cov_129.966291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738260_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactiplantibacillus;s__Lactiplantibacillus plantarum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738277_virus.252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Tybeckvirinae; Maenadvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539675" accession="ERS11141642">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141642</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539675</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738938_virus.921</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738938.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05236784140969162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>226.918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738938) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567102) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_921_length_57940_cov_130.426715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_9_CHE0002SZ__NODE_921_length_57940_cov_130.426715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738938_virus.921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539676" accession="ERS11141643">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141643</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539676</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_provirus.149</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.8004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_149_length_161677_cov_33.858676_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738957_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_149_length_161677_cov_33.858676_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745625_provirus.431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539677" accession="ERS11141644">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141644</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539677</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_provirus.2196</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_2196_length_41925_cov_9.357556_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738957_bin.360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9130434782608696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_2196_length_41925_cov_9.357556_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_provirus.1362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539678" accession="ERS11141645">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141645</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539678</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_provirus.332</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_332_length_114927_cov_12.889273_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4230769230769231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_332_length_114927_cov_12.889273_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738186_provirus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539679" accession="ERS11141646">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141646</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539679</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_provirus.60</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.7221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_60_length_225080_cov_36.183389_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738957_bin.231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_60_length_225080_cov_36.183389_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539680" accession="ERS11141647">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141647</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539680</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_provirus.914</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.6912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_914_length_69922_cov_23.922600_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_914_length_69922_cov_23.922600_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738575_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-279;s__CAG-279 sp000437795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539681" accession="ERS11141648">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141648</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539681</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Krischvirus virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.5433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_133_length_170505_cov_20.882625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6346863468634686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_133_length_170505_cov_20.882625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_virus.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tevenvirinae; Krischvirus; unclassified Krischvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539682" accession="ERS11141649">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141649</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539682</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.1633</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_1633_length_50948_cov_27.241395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_1633_length_50948_cov_27.241395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.1633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539683" accession="ERS11141650">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141650</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539683</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.1917</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>120.098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_1917_length_46025_cov_76.011665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_1917_length_46025_cov_76.011665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_virus.884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539684" accession="ERS11141651">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141651</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539684</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.2397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_2397_length_39787_cov_8.950844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_2397_length_39787_cov_8.950844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME232176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Enterobacter;s__Enterobacter hormaechei_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.2397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539685" accession="ERS11141652">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141652</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539685</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.2888</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.5631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_2888_length_35207_cov_14.901139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738957_bin.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_2888_length_35207_cov_14.901139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539686" accession="ERS11141653">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141653</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539686</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.371</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_371_length_109145_cov_11.023279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_371_length_109145_cov_11.023279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME276106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900548615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539687" accession="ERS11141654">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141654</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539687</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.4647</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.2005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_4647_length_25116_cov_23.871441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_4647_length_25116_cov_23.871441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746365_bin.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA5884;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_provirus.3444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539688" accession="ERS11141655">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141655</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539688</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.5936</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.39864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_5936_length_20800_cov_5.659123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738957_bin.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_5936_length_20800_cov_5.659123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;s__Mediterraneibacter torques</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_virus.2867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539689" accession="ERS11141656">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141656</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539689</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738957_virus.8959</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738957.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.96363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738957) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561272) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_8959_length_15244_cov_5.360094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_2_1628__NODE_8959_length_15244_cov_5.360094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.8959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539690" accession="ERS11141657">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141657</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539690</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_provirus.119</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.9048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_119_length_211621_cov_22.881519_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738959_bin.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_119_length_211621_cov_22.881519_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738241_bin.90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UBA737;s__UBA737 sp900549055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_provirus.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539691" accession="ERS11141658">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141658</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539691</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_provirus.176</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.9715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_176_length_182345_cov_19.061700_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738959_bin.368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_176_length_182345_cov_19.061700_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_provirus.176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539692" accession="ERS11141659">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141659</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539692</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_provirus.24</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.0137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_24_length_385624_cov_38.964106_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738959_bin.241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9142857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_24_length_385624_cov_38.964106_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.2420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539693" accession="ERS11141660">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141660</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539693</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_provirus.288</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.095911314301079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_288_length_143254_cov_9.553560_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738959_bin.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5757575757575758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_288_length_143254_cov_9.553560_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_G;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_provirus.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539694" accession="ERS11141661">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141661</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539694</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_provirus.396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>135.866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_396_length_124280_cov_89.101946_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9302325581395348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_396_length_124280_cov_89.101946_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738232_provirus.208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539695" accession="ERS11141662">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141662</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539695</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_provirus.537</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.0227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_537_length_109571_cov_15.613760_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738959_bin.371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3157894736842105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_537_length_109571_cov_15.613760_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio;s__Desulfovibrio piger</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_provirus.537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539696" accession="ERS11141663">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141663</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539696</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_provirus.884</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>184.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_884_length_85684_cov_110.276671_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738959_bin.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5806451612903226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_884_length_85684_cov_110.276671_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738186_provirus.543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539697" accession="ERS11141664">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141664</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539697</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.12161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0322136563876652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_12161_length_13440_cov_6.923356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_12161_length_13440_cov_6.923356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.12161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539698" accession="ERS11141665">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141665</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539698</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.16291</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.4402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_16291_length_10493_cov_26.432604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_16291_length_10493_cov_26.432604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.4536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539699" accession="ERS11141666">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141666</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539699</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.2105</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>176.516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_2105_length_50724_cov_103.975576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_2105_length_50724_cov_103.975576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539700" accession="ERS11141667">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141667</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539700</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.2423</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.51484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_2423_length_46687_cov_5.361982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_2423_length_46687_cov_5.361982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.2423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539701" accession="ERS11141668">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141668</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539701</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.2740</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_2740_length_43158_cov_9.779392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7738959_bin.410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_2740_length_43158_cov_9.779392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738556_virus.1073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539702" accession="ERS11141669">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141669</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539702</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.3134</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02349942002795121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.0205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_3134_length_38896_cov_28.214225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_3134_length_38896_cov_28.214225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539703" accession="ERS11141670">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141670</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539703</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.3398</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.99232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_3398_length_36716_cov_4.340703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6451612903225806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_3398_length_36716_cov_4.340703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738147_virus.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539704" accession="ERS11141671">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141671</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539704</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.3631</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_3631_length_35130_cov_6.705731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_3631_length_35130_cov_6.705731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger formicilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0250587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539705" accession="ERS11141672">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141672</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539705</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.4029</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.4793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_4029_length_32700_cov_26.499770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_4029_length_32700_cov_26.499770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.4029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539706" accession="ERS11141673">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141673</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539706</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.5241</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.39494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_5241_length_26706_cov_4.469263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_5241_length_26706_cov_4.469263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539707" accession="ERS11141674">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141674</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539707</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.717</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>257.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_717_length_95550_cov_150.277230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_717_length_95550_cov_150.277230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_virus.203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539708" accession="ERS11141675">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141675</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539708</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7738959_virus.9929</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7738959.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7738959) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560159) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_9929_length_15877_cov_9.796853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_D_4_2182__NODE_9929_length_15877_cov_9.796853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738959_virus.9929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539709" accession="ERS11141676">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141676</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539709</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_provirus.1779</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.5774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_1779_length_60881_cov_9.184050_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_1779_length_60881_cov_9.184050_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_provirus.1779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539710" accession="ERS11141677">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141677</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539710</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_provirus.227</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.4447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_227_length_197985_cov_19.170094_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7739005_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.942528735632184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_227_length_197985_cov_19.170094_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_provirus.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539711" accession="ERS11141678">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141678</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539711</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_provirus.3188</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_3188_length_37856_cov_6.200005_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_3188_length_37856_cov_6.200005_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_F;s__Eubacterium_F sp003491505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_provirus.475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539712" accession="ERS11141679">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141679</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539712</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_provirus.479</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_479_length_143343_cov_6.570617_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7739005_bin.193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7941176470588235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_479_length_143343_cov_6.570617_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_provirus.479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539713" accession="ERS11141680">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141680</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539713</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_provirus.9</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.4067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_9_length_650101_cov_21.520945_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7739005_bin.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_9_length_650101_cov_21.520945_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp003514385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539714" accession="ERS11141681">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141681</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539714</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_virus.1754</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>102.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_1754_length_61559_cov_58.549624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7739005_bin.195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4878048780487805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_1754_length_61559_cov_58.549624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539715" accession="ERS11141682">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141682</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539715</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_virus.2297</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.99235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_2297_length_49535_cov_5.397266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_2297_length_49535_cov_5.397266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_virus.2297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539716" accession="ERS11141683">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141683</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539716</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_virus.3170</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.6822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_3170_length_38074_cov_16.210174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_3170_length_38074_cov_16.210174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_virus.3170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539717" accession="ERS11141684">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141684</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539717</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7739005_virus.5139</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7739005.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7739005) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_5139_length_23865_cov_6.695309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_1_RAJ0018YZ__NODE_5139_length_23865_cov_6.695309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7739005_virus.5139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539718" accession="ERS11141685">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141685</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539718</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_provirus.1079</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06321814991225902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.0724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1079_length_82910_cov_34.934326_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1079_length_82910_cov_34.934326_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_provirus.810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539719" accession="ERS11141686">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141686</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539719</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_provirus.1394</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.5674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1394_length_70983_cov_36.010930_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7027027027027027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1394_length_70983_cov_36.010930_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFFH01;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_provirus.1394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539720" accession="ERS11141687">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141687</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539720</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_provirus.2483</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.3854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_2483_length_48773_cov_15.898903_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_2483_length_48773_cov_15.898903_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745787_bin.221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_provirus.2483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539721" accession="ERS11141688">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141688</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539721</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_provirus.4875</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.721800000000002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_4875_length_29672_cov_10.155736_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_4875_length_29672_cov_10.155736_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.1495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539722" accession="ERS11141689">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141689</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539722</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_provirus.94</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>251.368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_94_length_237268_cov_151.428461_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745291_bin.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_94_length_237268_cov_151.428461_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738575_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-279;s__CAG-279 sp000437795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737972_virus.307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539723" accession="ERS11141690">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141690</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539723</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.1119</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06551074844800975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1119_length_81190_cov_7.479898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1119_length_81190_cov_7.479898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738169_virus.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539724" accession="ERS11141691">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141691</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539724</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.1606</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>112.732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1606_length_64900_cov_68.710581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_1606_length_64900_cov_68.710581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.1606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539725" accession="ERS11141692">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141692</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539725</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.2351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.68811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_2351_length_50603_cov_5.203796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_2351_length_50603_cov_5.203796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.2351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539726" accession="ERS11141693">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141693</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539726</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.2731</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>108.664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_2731_length_45627_cov_65.351745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745291_bin.276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_2731_length_45627_cov_65.351745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.2731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539727" accession="ERS11141694">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141694</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539727</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.3069</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>4.9472295514511e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:53Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.5104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3069_length_42127_cov_41.551201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3069_length_42127_cov_41.551201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539728" accession="ERS11141695">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141695</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539728</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.3243</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3243_length_40627_cov_9.303329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7575757575757576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3243_length_40627_cov_9.303329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_provirus.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539729" accession="ERS11141696">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141696</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539729</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.3578</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>168.487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3578_length_37775_cov_108.799326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3578_length_37775_cov_108.799326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.3578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539730" accession="ERS11141697">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141697</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539730</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.3948</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3948_length_35066_cov_12.132442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745291_bin.435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_3948_length_35066_cov_12.132442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.3948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539731" accession="ERS11141698">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141698</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539731</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.4575</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_4575_length_31213_cov_9.200186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_4575_length_31213_cov_9.200186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME006546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-605;s__CAG-605 sp000433255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539732" accession="ERS11141699">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141699</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539732</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.542</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.0914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_542_length_119110_cov_35.303328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_542_length_119110_cov_35.303328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539733" accession="ERS11141700">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141700</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539733</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.6810</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1437523586468131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:18Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_6810_length_22929_cov_13.112813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_6810_length_22929_cov_13.112813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.6810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539734" accession="ERS11141701">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141701</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539734</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745291_virus.82</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745291.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745291) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559329) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_82_length_256725_cov_12.856936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_11_2507__NODE_82_length_256725_cov_12.856936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738623_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539735" accession="ERS11141702">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141702</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539735</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745329_provirus.1014</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745329.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.8607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745329) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_1014_length_43158_cov_11.441006_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_1014_length_43158_cov_11.441006_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745990_bin.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746738_virus.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539736" accession="ERS11141703">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141703</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539736</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745329_provirus.2751</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745329.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15446967015660706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745329) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_2751_length_18422_cov_10.351213_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_2751_length_18422_cov_10.351213_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738582_virus.3646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539737" accession="ERS11141704">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141704</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539737</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745329_provirus.690</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745329.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745329) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_690_length_57046_cov_7.944970_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_690_length_57046_cov_7.944970_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-273;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745329_provirus.690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539738" accession="ERS11141705">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141705</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539738</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745329_virus.1258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745329.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.8379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745329) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_1258_length_36100_cov_28.126058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745329_bin.110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_1258_length_36100_cov_28.126058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME273237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-1000;s__CAG-1000 sp000434555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.2705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539739" accession="ERS11141706">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141706</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539739</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745329_virus.19518</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7745329.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09923314058977054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.19282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745329) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_19518_length_3438_cov_4.128236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_19518_length_3438_cov_4.128236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745329_virus.19518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539740" accession="ERS11141707">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141707</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539740</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745329_virus.549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745329.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.9786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745329) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_549_length_66134_cov_18.655358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_549_length_66134_cov_18.655358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745329_virus.549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539741" accession="ERS11141708">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141708</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539741</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745329_virus.951</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 virus assembled from ERR7745329.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0286756314816957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.78533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745329) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559348) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_951_length_45167_cov_5.368286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7804878048780488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_24_2613__NODE_951_length_45167_cov_5.368286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738609_virus.1545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539742" accession="ERS11141709">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141709</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539742</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_provirus.1544</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_1544_length_67493_cov_6.755444_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745335_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_1544_length_67493_cov_6.755444_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.3224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539743" accession="ERS11141710">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141710</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539743</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_provirus.2103</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_2103_length_56213_cov_7.376870_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_2103_length_56213_cov_7.376870_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_provirus.2103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539744" accession="ERS11141711">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141711</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539744</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_provirus.3</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3_length_665699_cov_15.034449_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745335_bin.290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7258064516129032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3_length_665699_cov_15.034449_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_provirus.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539745" accession="ERS11141712">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141712</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539745</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_provirus.428</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>334.519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_428_length_133685_cov_195.891361_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745335_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_428_length_133685_cov_195.891361_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_provirus.428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539746" accession="ERS11141713">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141713</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539746</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_provirus.871</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.8359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_871_length_92958_cov_55.636556_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745335_bin.518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_871_length_92958_cov_55.636556_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747083_virus.1678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539747" accession="ERS11141714">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141714</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539747</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.12396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_12396_length_15277_cov_8.137763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_12396_length_15277_cov_8.137763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.12396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539748" accession="ERS11141715">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141715</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539748</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.1821</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacillus virus BCD7 virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01734581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.84211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_1821_length_61376_cov_4.260983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.23809523809523808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_1821_length_61376_cov_4.260983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738627_virus.363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Becedseptimavirus; Bacillus virus BCD7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539749" accession="ERS11141716">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141716</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539749</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.2182</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.6338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_2182_length_54955_cov_36.284486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_2182_length_54955_cov_36.284486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME099131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000435075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.2182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539750" accession="ERS11141717">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141717</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539750</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.2660</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.0651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_2660_length_48701_cov_29.626954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_2660_length_48701_cov_29.626954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.2660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539751" accession="ERS11141718">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141718</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539751</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.3007</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.4347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3007_length_44886_cov_14.283782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6388888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3007_length_44886_cov_14.283782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.3007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539752" accession="ERS11141719">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141719</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539752</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.3388</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1217791767270203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3388_length_41371_cov_6.072190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4090909090909091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3388_length_41371_cov_6.072190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.3388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539753" accession="ERS11141720">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141720</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539753</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.3630</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539753</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.9741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3630_length_39655_cov_48.635707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3630_length_39655_cov_48.635707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.3630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539754" accession="ERS11141721">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141721</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539754</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.3862</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05195538818076477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.7965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3862_length_38084_cov_22.285974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_3862_length_38084_cov_22.285974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737966_provirus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539755" accession="ERS11141722">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141722</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539755</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.4095</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Campylobacter phage DA10 virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_4095_length_36632_cov_9.690904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_4095_length_36632_cov_9.690904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745335_bin.520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Campylobacterota;c__Campylobacteria;o__Campylobacterales;f__Campylobacteraceae;g__Campylobacter_D;s__Campylobacter_D sp900539255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.4095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Campylobacter phage DA10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539756" accession="ERS11141723">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141723</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539756</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.4506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>482.704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_4506_length_34321_cov_279.269653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_4506_length_34321_cov_279.269653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.1216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539757" accession="ERS11141724">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141724</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539757</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.5242</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_5242_length_30478_cov_5.799382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_5242_length_30478_cov_5.799382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.1858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539758" accession="ERS11141725">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141725</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539758</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.6393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.92516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_6393_length_26152_cov_5.743739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_6393_length_26152_cov_5.743739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.6393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539759" accession="ERS11141726">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141726</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539759</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.7384</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_7384_length_23145_cov_10.406581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745335_bin.358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_7384_length_23145_cov_10.406581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.7384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539760" accession="ERS11141727">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141727</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539760</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.8273</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2954877441377306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.82197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_8273_length_21208_cov_5.038900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_8273_length_21208_cov_5.038900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia;s__Blautia stercoris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.8273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539761" accession="ERS11141728">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141728</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539761</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745335_virus.9968</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745335.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.80364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745335) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561292) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_9968_length_18219_cov_4.071988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_16_1621__NODE_9968_length_18219_cov_4.071988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.9968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539762" accession="ERS11141729">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141729</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539762</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_provirus.1694</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:19Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>160.506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_1694_length_71105_cov_97.178507_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745346_bin.174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_1694_length_71105_cov_97.178507_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Rikenellaceae;g__Alistipes;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746009_virus.831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539763" accession="ERS11141730">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141730</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539763</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_provirus.2093</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02812088060660004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>114.641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2093_length_59843_cov_70.571412_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745346_bin.277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2093_length_59843_cov_70.571412_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_provirus.2093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539764" accession="ERS11141731">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141731</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539764</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_provirus.3208</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_3208_length_40261_cov_8.386696_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745346_bin.274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_3208_length_40261_cov_8.386696_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_provirus.3208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539765" accession="ERS11141732">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141732</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539765</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_provirus.533</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis provirus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_533_length_152189_cov_54.213915_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745346_bin.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5272727272727272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_533_length_152189_cov_54.213915_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_provirus.533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539766" accession="ERS11141733">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141733</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539766</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_provirus.891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_891_length_112347_cov_9.466483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_891_length_112347_cov_9.466483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME007797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__CAG-115 sp003531585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_provirus.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539767" accession="ERS11141734">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141734</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539767</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.3932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_157_length_256542_cov_20.406547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8028169014084507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_157_length_256542_cov_20.406547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Intestinibacter;s__Intestinibacter bartlettii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539768" accession="ERS11141735">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141735</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539768</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.2130</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Clostridium phage phiCp-D virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.2344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2130_length_59108_cov_37.312920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2130_length_59108_cov_37.312920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P perfringens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.2130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Clostridium phage phiCp-D</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539769" accession="ERS11141736">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141736</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539769</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.2624</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2624_length_49133_cov_10.481674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2624_length_49133_cov_10.481674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.2624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539770" accession="ERS11141737">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141737</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539770</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.2977</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.3871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2977_length_43242_cov_18.481293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5357142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_2977_length_43242_cov_18.481293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME007974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp003516765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.2977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539771" accession="ERS11141738">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141738</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539771</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.3369</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>552.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_3369_length_38317_cov_335.454890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_3369_length_38317_cov_335.454890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.3369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539772" accession="ERS11141739">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141739</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539772</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.3656</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.2902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_3656_length_35188_cov_27.066646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_3656_length_35188_cov_27.066646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger formicilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0352455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539773" accession="ERS11141740">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141740</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539773</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.4562</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_4562_length_28113_cov_7.988657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_4562_length_28113_cov_7.988657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.4562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539774" accession="ERS11141741">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141741</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539774</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.5338</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.52423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_5338_length_23674_cov_5.556342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_5338_length_23674_cov_5.556342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0315825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539775" accession="ERS11141742">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141742</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539775</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745346_virus.5986</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745346.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745346) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567276) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_5986_length_20899_cov_8.378398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_1_RAJ1015YZ__NODE_5986_length_20899_cov_8.378398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.5986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539776" accession="ERS11141743">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141743</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539776</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745362_provirus.1100</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745362.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05049472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745362) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_1100_length_25032_cov_8.054057_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745362_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_1100_length_25032_cov_8.054057_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;s__Catenibacterium sp900764725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_provirus.657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539777" accession="ERS11141744">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141744</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539777</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745362_provirus.623</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745362.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01745594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.8357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745362) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_623_length_34343_cov_19.817312_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_623_length_34343_cov_19.817312_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745362_provirus.623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539778" accession="ERS11141745">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141745</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539778</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745362_virus.260</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745362.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.8845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745362) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_260_length_53390_cov_69.173710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_260_length_53390_cov_69.173710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539779" accession="ERS11141746">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141746</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539779</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745362_virus.71</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745362.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.7148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745362) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561197) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_71_length_95473_cov_41.683310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_24_1627__NODE_71_length_95473_cov_41.683310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_virus.203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539780" accession="ERS11141747">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141747</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539780</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745366_provirus.31</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745366.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.1583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745366) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559737) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_31_length_151153_cov_58.930121_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_31_length_151153_cov_58.930121_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UBA3631;s__UBA3631 sp900546275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745366_provirus.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539781" accession="ERS11141748">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141748</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539781</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745366_virus.1661</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745366.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.21277445863796457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.75375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745366) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559737) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_1661_length_17770_cov_4.506415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_1661_length_17770_cov_4.506415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738563_provirus.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539782" accession="ERS11141749">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141749</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539782</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745366_virus.3605</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745366.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.4605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745366) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559737) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_3605_length_10308_cov_56.165575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_3605_length_10308_cov_56.165575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738513_bin.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__Zag1;s__Zag1 sp001765415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_provirus.1534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539783" accession="ERS11141750">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141750</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539783</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745366_virus.56</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745366.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.6375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745366) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559737) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_56_length_118808_cov_14.492997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9305555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_8_2346__NODE_56_length_118808_cov_14.492997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745366_virus.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539784" accession="ERS11141751">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141751</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539784</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745371_provirus.341</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745371.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745371) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559796) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_341_length_63841_cov_7.724170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745371_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_341_length_63841_cov_7.724170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738592_provirus.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539785" accession="ERS11141752">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141752</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539785</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745371_virus.1142</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745371.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15566732807002084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.38423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745371) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559796) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_1142_length_31545_cov_5.025518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_1142_length_31545_cov_5.025518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738211_virus.394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539786" accession="ERS11141753">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141753</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539786</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745371_virus.23</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745371.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745371) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559796) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_23_length_189803_cov_10.482538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_23_length_189803_cov_10.482538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539787" accession="ERS11141754">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141754</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539787</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745371_virus.580</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745371.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.1299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745371) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559796) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_580_length_47468_cov_24.333165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_19_2444__NODE_580_length_47468_cov_24.333165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_provirus.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539788" accession="ERS11141755">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141755</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539788</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745376_provirus.124</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Moineauvirus provirus assembled from ERR7745376.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745376) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560592) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_17_1704__NODE_124_length_58323_cov_9.946331_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745376_bin.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_17_1704__NODE_124_length_58323_cov_9.946331_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745376_provirus.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Moineauvirus; unclassified Moineauvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539789" accession="ERS11141756">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141756</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539789</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745376_virus.312</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745376.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02241189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>107.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745376) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560592) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_17_1704__NODE_312_length_25000_cov_67.698271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_17_1704__NODE_312_length_25000_cov_67.698271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium infantis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737955_virus.323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539790" accession="ERS11141757">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141757</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539790</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745378_provirus.151</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745378.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06389767615329436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>229.852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745378) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560154) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_151_length_127848_cov_174.697341_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_151_length_127848_cov_174.697341_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.1424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539791" accession="ERS11141758">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141758</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539791</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745378_provirus.421</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7745378.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.70817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745378) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560154) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_421_length_75450_cov_5.529818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_421_length_75450_cov_5.529818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900554205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745378_provirus.421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539792" accession="ERS11141759">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141759</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539792</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745378_virus.1063</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745378.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.9998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745378) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560154) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_1063_length_42116_cov_23.460358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_1063_length_42116_cov_23.460358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0331678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539793" accession="ERS11141760">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141760</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539793</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745378_virus.1937</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745378.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.8649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745378) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560154) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_1937_length_26694_cov_25.195401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_1937_length_26694_cov_25.195401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_provirus.1712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539794" accession="ERS11141761">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141761</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539794</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745378_virus.243</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745378.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:52Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>108.269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745378) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560154) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_243_length_103501_cov_60.660098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9387755102040816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_243_length_103501_cov_60.660098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539795" accession="ERS11141762">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141762</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539795</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745378_virus.3260</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Picovirinae virus assembled from ERR7745378.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.6803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745378) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560154) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_3260_length_17600_cov_49.823318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_3260_length_17600_cov_49.823318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Bariatricus;s__Bariatricus comes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745378_virus.3260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae; Picovirinae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539796" accession="ERS11141763">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141763</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539796</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745378_virus.725</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7745378.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745378) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560154) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_725_length_54691_cov_8.786740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7037037037037037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_2_2205__NODE_725_length_54691_cov_8.786740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_virus.231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539797" accession="ERS11141764">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141764</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539797</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745382_provirus.42</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745382.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.3155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745382) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_42_length_214291_cov_23.168719_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745382_bin.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_42_length_214291_cov_23.168719_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>Siphoviridae environmental samples</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539798" accession="ERS11141765">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141765</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539798</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745382_virus.1097</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745382.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.5102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745382) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_1097_length_38550_cov_14.066878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745382_bin.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_1097_length_38550_cov_14.066878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738248_provirus.1198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539799" accession="ERS11141766">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141766</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539799</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745382_virus.1416</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745382.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.14455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745382) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_1416_length_32152_cov_5.232175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_1416_length_32152_cov_5.232175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745382_virus.1416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539800" accession="ERS11141767">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141767</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539800</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745382_virus.217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7745382.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.4088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745382) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_217_length_101909_cov_50.937102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.989010989010989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_217_length_101909_cov_50.937102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0369090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539801" accession="ERS11141768">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141768</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539801</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745382_virus.549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745382.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05290652265258985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.57305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745382) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_549_length_60019_cov_5.518501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_549_length_60019_cov_5.518501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738151_bin.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter sp900548765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_provirus.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539802" accession="ERS11141769">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141769</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539802</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745382_virus.921</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745382.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.1394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745382) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_921_length_43410_cov_57.411188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_E_21_2379__NODE_921_length_43410_cov_57.411188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_provirus.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539803" accession="ERS11141770">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141770</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539803</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745383_provirus.313</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745383.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745383) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560118) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_313_length_81978_cov_12.157617_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_313_length_81978_cov_12.157617_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900541925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738217_virus.1221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539804" accession="ERS11141771">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141771</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539804</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745383_virus.1014</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745383.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.5152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745383) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560118) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_1014_length_39248_cov_27.932807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_1014_length_39248_cov_27.932807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746779_virus.1100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539805" accession="ERS11141772">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141772</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539805</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745383_virus.1447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745383.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.2354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745383) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560118) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_1447_length_31265_cov_8.853085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_1447_length_31265_cov_8.853085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539806" accession="ERS11141773">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141773</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539806</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745383_virus.2314</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745383.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00270137614315428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.94495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745383) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560118) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_2314_length_21840_cov_4.344530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_2314_length_21840_cov_4.344530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_provirus.854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539807" accession="ERS11141774">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141774</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539807</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745383_virus.44</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745383.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745383) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560118) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_44_length_198446_cov_10.709733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745383_bin.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5151515151515151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_44_length_198446_cov_10.709733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539808" accession="ERS11141775">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141775</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539808</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745383_virus.847</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745383.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.52246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745383) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560118) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_847_length_43876_cov_5.381333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_B_22_2177__NODE_847_length_43876_cov_5.381333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539809" accession="ERS11141776">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141776</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539809</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745384_provirus.2087</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. provirus assembled from ERR7745384.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.3657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745384) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560192) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_2087_length_26347_cov_24.081043_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_2087_length_26347_cov_24.081043_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745675_provirus.3468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539810" accession="ERS11141777">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141777</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539810</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745384_provirus.382</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745384.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05270140052539582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>206.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745384) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560192) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_382_length_81574_cov_119.143183_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_382_length_81574_cov_119.143183_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.1422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539811" accession="ERS11141778">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141778</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539811</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745384_provirus.970</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745384.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745384) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560192) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_970_length_46831_cov_6.797237_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_970_length_46831_cov_6.797237_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745836_virus.193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539812" accession="ERS11141779">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141779</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539812</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745384_virus.1644</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745384.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0630538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745384) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560192) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_1644_length_32133_cov_6.045202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_1644_length_32133_cov_6.045202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_virus.2233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539813" accession="ERS11141780">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141780</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539813</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745384_virus.2367</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745384.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.81773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745384) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560192) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_2367_length_23593_cov_5.422904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_2367_length_23593_cov_5.422904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539814" accession="ERS11141781">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141781</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539814</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745384_virus.4197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745384.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745384) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560192) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_4197_length_13110_cov_9.986881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_4197_length_13110_cov_9.986881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A sp900066145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745675_virus.4730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539815" accession="ERS11141782">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141782</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539815</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745384_virus.695</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745384.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745384) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560192) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_695_length_57932_cov_7.082620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_20_2245__NODE_695_length_57932_cov_7.082620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745675_virus.586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539816" accession="ERS11141783">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141783</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539816</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_provirus.1571</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.1087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_1571_length_25450_cov_13.976156_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_1571_length_25450_cov_13.976156_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME176098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745386_provirus.1571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539817" accession="ERS11141784">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141784</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539817</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_provirus.6</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_6_length_298064_cov_7.518694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745386_bin.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_6_length_298064_cov_7.518694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME150720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp003526955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745386_provirus.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539818" accession="ERS11141785">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141785</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539818</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_virus.1157</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05495594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.5539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_1157_length_31753_cov_19.484878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_1157_length_31753_cov_19.484878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745386_virus.1157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539819" accession="ERS11141786">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141786</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539819</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_virus.161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_161_length_97109_cov_12.672067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_161_length_97109_cov_12.672067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738250_virus.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539820" accession="ERS11141787">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141787</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539820</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_virus.29</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_29_length_180563_cov_15.669963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_29_length_180563_cov_15.669963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Romboutsia;s__Romboutsia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_virus.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539821" accession="ERS11141788">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141788</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539821</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_virus.582</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02177917672702028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.4612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_582_length_50478_cov_25.513264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_582_length_50478_cov_25.513264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745386_virus.582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539822" accession="ERS11141789">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141789</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539822</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_virus.818</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_818_length_39965_cov_10.744008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_818_length_39965_cov_10.744008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745386_virus.818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539823" accession="ERS11141790">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141790</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539823</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745386_virus.993</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745386.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.2296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745386) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560401) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_993_length_35362_cov_28.851608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_10_1818__NODE_993_length_35362_cov_28.851608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME176098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745386_virus.993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539824" accession="ERS11141791">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141791</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539824</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745388_provirus.579</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745388.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745388) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559653) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_579_length_65713_cov_13.651015_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_579_length_65713_cov_13.651015_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746741_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;s__RUG572 sp900547945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_provirus.467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539825" accession="ERS11141792">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141792</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539825</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745388_virus.1083</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745388.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>98.9933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745388) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559653) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_1083_length_44385_cov_61.043942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4411764705882353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_1083_length_44385_cov_61.043942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME090929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__CAG-274;g__UMGS1441;s__UMGS1441 sp900551755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745388_virus.1083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539826" accession="ERS11141793">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141793</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539826</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745388_virus.1305</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745388.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745388) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559653) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_1305_length_39422_cov_10.289922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_1305_length_39422_cov_10.289922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745388_virus.1305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539827" accession="ERS11141794">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141794</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539827</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745388_virus.2288</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745388.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.93148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745388) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559653) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_2288_length_25163_cov_4.859802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_2288_length_25163_cov_4.859802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539828" accession="ERS11141795">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141795</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539828</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745388_virus.4576</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745388.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.89549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745388) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559653) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_4576_length_14216_cov_5.019238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_4576_length_14216_cov_5.019238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738205_virus.2854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539829" accession="ERS11141796">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141796</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539829</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745388_virus.863</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7745388.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745388) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559653) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_863_length_51376_cov_10.409131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5813953488372093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_6_2333__NODE_863_length_51376_cov_10.409131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539830" accession="ERS11141797">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141797</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539830</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745389_provirus.17</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745389.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745389) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560206) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_17_length_264379_cov_55.568456_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_17_length_264379_cov_55.568456_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745922_provirus.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539831" accession="ERS11141798">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141798</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539831</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745389_provirus.385</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745389.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.71892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745389) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560206) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_385_length_75186_cov_4.818584_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745389_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7096774193548387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_385_length_75186_cov_4.818584_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_provirus.385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539832" accession="ERS11141799">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141799</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539832</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745389_virus.1233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745389.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.0632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745389) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560206) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_1233_length_38054_cov_18.335124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745389_bin.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_1233_length_38054_cov_18.335124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745922_virus.812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539833" accession="ERS11141800">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141800</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539833</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745389_virus.1885</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745389.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745389) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560206) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_1885_length_27065_cov_10.432896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_1885_length_27065_cov_10.432896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0307965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539834" accession="ERS11141801">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141801</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539834</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745389_virus.275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745389.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>197.858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745389) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560206) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_275_length_88846_cov_226.136895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5853658536585366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_275_length_88846_cov_226.136895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539835" accession="ERS11141802">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141802</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539835</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745389_virus.506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745389.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745389) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560206) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_506_length_66205_cov_10.464055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745389_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_506_length_66205_cov_10.464055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738172_virus.230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539836" accession="ERS11141803">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141803</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539836</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745389_virus.9126</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Gokushovirus WZ-2015a virus assembled from ERR7745389.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1051009084400288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.48467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745389) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560206) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_9126_length_5337_cov_4.328897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_2_2257__NODE_9126_length_5337_cov_4.328897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745675_virus.10210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; unclassified Gokushovirinae; Gokushovirus WZ-2015a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539837" accession="ERS11141804">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141804</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539837</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_provirus.288</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1733640890942851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.97348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_288_length_84873_cov_5.213442_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745397_bin.133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_288_length_84873_cov_5.213442_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745388_virus.661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539838" accession="ERS11141805">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141805</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539838</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_provirus.672</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_672_length_50832_cov_8.575884_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745397_bin.218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_672_length_50832_cov_8.575884_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738213_virus.2248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539839" accession="ERS11141806">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141806</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539839</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_virus.1197</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_1197_length_36068_cov_6.459309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_1197_length_36068_cov_6.459309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738213_virus.1684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539840" accession="ERS11141807">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141807</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539840</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_virus.144</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_144_length_119613_cov_21.539143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.927536231884058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_144_length_119613_cov_21.539143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539841" accession="ERS11141808">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141808</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539841</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_virus.201</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_201_length_101303_cov_7.233813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_201_length_101303_cov_7.233813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_virus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539842" accession="ERS11141809">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141809</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539842</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_virus.2621</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.2407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_2621_length_21562_cov_36.857529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_2621_length_21562_cov_36.857529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.3505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539843" accession="ERS11141810">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141810</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539843</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_virus.34</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_34_length_276192_cov_8.204842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7424242424242424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_34_length_276192_cov_8.204842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745397_virus.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539844" accession="ERS11141811">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141811</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539844</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745397_virus.738</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745397.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.4264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745397) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561073) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_738_length_47978_cov_17.213002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_23_1576__NODE_738_length_47978_cov_17.213002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738610_bin.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745397_virus.738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539845" accession="ERS11141812">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141812</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539845</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745398_provirus.1173</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>156.424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560823) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_1173_length_38106_cov_94.518683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745398_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_1173_length_38106_cov_94.518683_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_virus.1626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539846" accession="ERS11141813">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141813</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539846</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745398_provirus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.3567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560823) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_272_length_100107_cov_45.103159_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745398_bin.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_272_length_100107_cov_45.103159_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0346320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539847" accession="ERS11141814">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141814</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539847</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745398_provirus.935</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.21216960352422912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560823) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_935_length_45990_cov_6.270533_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_935_length_45990_cov_6.270533_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746365_bin.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA5884;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_provirus.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539848" accession="ERS11141815">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141815</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539848</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745398_virus.1422</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560823) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_1422_length_32882_cov_6.333791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_1422_length_32882_cov_6.333791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME272760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746436_virus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539849" accession="ERS11141816">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141816</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539849</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745398_virus.2777</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560823) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_2777_length_18575_cov_10.825549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_2777_length_18575_cov_10.825549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745398_virus.2777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539850" accession="ERS11141817">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141817</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539850</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745398_virus.4168</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01996515875021345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560823) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_4168_length_13195_cov_13.356990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745398_bin.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_4168_length_13195_cov_13.356990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_virus.5711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539851" accession="ERS11141818">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141818</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539851</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745398_virus.8958</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.63969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560823) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_8958_length_6625_cov_3.841326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_11_1531__NODE_8958_length_6625_cov_3.841326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_provirus.13346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539852" accession="ERS11141819">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141819</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539852</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745402_provirus.281</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745402.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.15998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745402) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559900) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_281_length_101844_cov_5.123124_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745402_bin.163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_281_length_101844_cov_5.123124_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME233621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D sp000434695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_provirus.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539853" accession="ERS11141820">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141820</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539853</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745402_provirus.737</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745402.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745402) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559900) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_737_length_60182_cov_12.501206_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9722222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_737_length_60182_cov_12.501206_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.1309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539854" accession="ERS11141821">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141821</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539854</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745402_virus.1211</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745402.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745402) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559900) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_1211_length_42615_cov_12.986459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_1211_length_42615_cov_12.986459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745410_virus.397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539855" accession="ERS11141822">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141822</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539855</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745402_virus.1598</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745402.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745402) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559900) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_1598_length_34421_cov_6.082926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_1598_length_34421_cov_6.082926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539856" accession="ERS11141823">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141823</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539856</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745402_virus.2710</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745402.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07823510284733451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745402) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559900) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_2710_length_21583_cov_6.482470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_2710_length_21583_cov_6.482470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.2180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539857" accession="ERS11141824">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141824</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539857</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745402_virus.527</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Schitoviridae virus assembled from ERR7745402.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06303277487972782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745402) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559900) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_527_length_73968_cov_22.818462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_1_2491__NODE_527_length_73968_cov_22.818462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738189_virus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Schitoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539858" accession="ERS11141825">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141825</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539858</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745403_provirus.114</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745403.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.6361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745403) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559773) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_114_length_162122_cov_29.136407_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745403_bin.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_114_length_162122_cov_29.136407_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738938_bin.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;s__COE1 sp001916965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745412_provirus.101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539859" accession="ERS11141826">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141826</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539859</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745403_provirus.913</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7745403.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:51Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.6256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745403) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559773) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_913_length_42775_cov_24.940630_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_913_length_42775_cov_24.940630_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME209802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium sp900539885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745403_provirus.913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539860" accession="ERS11141827">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141827</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539860</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745403_virus.153</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745403.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.0183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745403) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559773) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_153_length_138150_cov_13.540417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_153_length_138150_cov_13.540417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745403_virus.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539861" accession="ERS11141828">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141828</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539861</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745403_virus.3991</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7745403.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20772215993452803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.3542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745403) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559773) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_3991_length_14210_cov_11.904691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745403_bin.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_3991_length_14210_cov_11.904691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745412_virus.3257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539862" accession="ERS11141829">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141829</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539862</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745403_virus.656</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7745403.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.2069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745403) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559773) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_656_length_54304_cov_28.100780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7457627118644068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_P_20_2369__NODE_656_length_54304_cov_28.100780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_virus.783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539863" accession="ERS11141830">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141830</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539863</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745408_provirus.348</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7745408.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14888717346443942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745408) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561111) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_348_length_87485_cov_5.963825_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745408_bin.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_348_length_87485_cov_5.963825_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_provirus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539864" accession="ERS11141831">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141831</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539864</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745408_provirus.821</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745408.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>143.846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745408) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561111) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_821_length_52563_cov_81.879053_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_821_length_52563_cov_81.879053_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539865" accession="ERS11141832">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141832</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539865</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745408_virus.1130</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745408.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17747797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.4711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745408) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561111) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_1130_length_41312_cov_41.073966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_1130_length_41312_cov_41.073966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_provirus.745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539866" accession="ERS11141833">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141833</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539866</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745408_virus.1474</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7745408.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745408) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561111) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_1474_length_34170_cov_7.503094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745408_bin.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_1474_length_34170_cov_7.503094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000434975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_provirus.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539867" accession="ERS11141834">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141834</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539867</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745408_virus.2512</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745408.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.91259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745408) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561111) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_2512_length_23317_cov_5.623795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_2512_length_23317_cov_5.623795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.2258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539868" accession="ERS11141835">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141835</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539868</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745408_virus.579</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745408.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.2562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745408) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561111) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_579_length_64458_cov_26.171262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_579_length_64458_cov_26.171262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539869" accession="ERS11141836">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141836</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539869</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745408_virus.832</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745408.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.4818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745408) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561111) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_832_length_52172_cov_16.351531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_17_1592__NODE_832_length_52172_cov_16.351531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539870" accession="ERS11141837">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141837</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539870</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745410_provirus.146</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745410.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.6446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745410) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559957) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_146_length_143571_cov_22.715452_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745410_bin.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_146_length_143571_cov_22.715452_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000434975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.1119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539871" accession="ERS11141838">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141838</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539871</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745410_provirus.73</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7745410.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.3515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745410) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559957) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_73_length_189672_cov_14.728015_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745410_bin.155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48148148148148145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_73_length_189672_cov_14.728015_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.1266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539872" accession="ERS11141839">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141839</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539872</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745410_virus.1183</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745410.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.7406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745410) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559957) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_1183_length_41822_cov_16.522554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_1183_length_41822_cov_16.522554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp900552845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745410_virus.1183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539873" accession="ERS11141840">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141840</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539873</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745410_virus.1847</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745410.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.96317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745410) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559957) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_1847_length_29501_cov_4.344175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_1847_length_29501_cov_4.344175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__MARSEILLE-P3954;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.1568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539874" accession="ERS11141841">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141841</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539874</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745410_virus.396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745410.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.0106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745410) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559957) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_396_length_85712_cov_24.914953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_396_length_85712_cov_24.914953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745867_virus.384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539875" accession="ERS11141842">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141842</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539875</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745410_virus.803</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745410.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745410) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559957) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_803_length_54712_cov_6.785449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_7_2493__NODE_803_length_54712_cov_6.785449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539876" accession="ERS11141843">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141843</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539876</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745411_virus.1042</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_1042_length_38541_cov_10.718724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745411_bin.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_1042_length_38541_cov_10.718724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745433_virus.963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539877" accession="ERS11141844">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141844</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539877</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745411_virus.2016</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.92801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_2016_length_21791_cov_5.260017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_2016_length_21791_cov_5.260017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745411_virus.2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539878" accession="ERS11141845">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141845</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539878</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745411_virus.3610</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_3610_length_12909_cov_8.214931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_3610_length_12909_cov_8.214931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745411_virus.3610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539879" accession="ERS11141846">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141846</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539879</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745411_virus.76</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>154.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566898) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_76_length_181224_cov_94.207406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6216216216216216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_23_1541__NODE_76_length_181224_cov_94.207406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746692_virus.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539880" accession="ERS11141847">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141847</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539880</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745412_provirus.101</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745412.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.4083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745412) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559636) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_101_length_170703_cov_35.394787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745412_bin.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_101_length_170703_cov_35.394787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738938_bin.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;s__COE1 sp001916965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745412_provirus.101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539881" accession="ERS11141848">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141848</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539881</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745412_provirus.593</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745412.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.5274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745412) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559636) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_593_length_60110_cov_55.717872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745412_bin.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_593_length_60110_cov_55.717872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745412_provirus.593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539882" accession="ERS11141849">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141849</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539882</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745412_virus.1232</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745412.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.9737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745412) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559636) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_1232_length_35444_cov_49.790794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_1232_length_35444_cov_49.790794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME134541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_C;s__Ruminococcus_C sp000437255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_virus.1437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539883" accession="ERS11141850">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141850</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539883</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745412_virus.2519</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745412.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00495594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.38668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745412) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559636) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_2519_length_20490_cov_4.700926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_2519_length_20490_cov_4.700926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0245806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539884" accession="ERS11141851">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141851</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539884</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745412_virus.597</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745412.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6274779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.7394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745412) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559636) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_597_length_59632_cov_25.840803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_4_2332__NODE_597_length_59632_cov_25.840803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738166_provirus.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539885" accession="ERS11141852">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141852</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539885</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745414_provirus.1314</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745414.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745414) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560961) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_1314_length_37009_cov_7.697633_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745414_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_1314_length_37009_cov_7.697633_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737956_provirus.772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539886" accession="ERS11141853">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141853</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539886</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745414_provirus.488</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis provirus assembled from ERR7745414.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.2763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745414) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560961) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_488_length_65892_cov_15.532690_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.27450980392156865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_488_length_65892_cov_15.532690_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_virus.231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539887" accession="ERS11141854">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141854</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539887</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745414_virus.1037</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745414.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.4333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745414) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560961) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_1037_length_42378_cov_16.124914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_1037_length_42378_cov_16.124914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__F23-B02;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.3571512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539888" accession="ERS11141855">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141855</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539888</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745414_virus.1381</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745414.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.0735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745414) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560961) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_1381_length_35668_cov_43.874435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4705882352941176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_1381_length_35668_cov_43.874435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_virus.1280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539889" accession="ERS11141856">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141856</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539889</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745414_virus.2019</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745414.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.0627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745414) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560961) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_2019_length_28135_cov_17.481360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_2019_length_28135_cov_17.481360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.3010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539890" accession="ERS11141857">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141857</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539890</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745414_virus.50498</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7745414.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>4.079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745414) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560961) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_50498_length_2264_cov_2.257888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_50498_length_2264_cov_2.257888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738664_bin.300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter sp900543445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-3219806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539891" accession="ERS11141858">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141858</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539891</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745414_virus.9555</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745414.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.6366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745414) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560961) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_9555_length_8348_cov_8.284972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_5_1538__NODE_9555_length_8348_cov_8.284972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_provirus.5185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539892" accession="ERS11141859">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141859</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539892</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745419_provirus.17</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745419.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09955947136563878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745419) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559960) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_17_length_292599_cov_7.583300_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_17_length_292599_cov_7.583300_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_provirus.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539893" accession="ERS11141860">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141860</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539893</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745419_provirus.756</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745419.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745419) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559960) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_756_length_56750_cov_14.417677_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_756_length_56750_cov_14.417677_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539894" accession="ERS11141861">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141861</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539894</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745419_virus.1250</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745419.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.2352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745419) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559960) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_1250_length_41473_cov_36.291115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_1250_length_41473_cov_36.291115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0331678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539895" accession="ERS11141862">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141862</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539895</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745419_virus.1708</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745419.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.09168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745419) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559960) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_1708_length_33179_cov_4.010664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_1708_length_33179_cov_4.010664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539896" accession="ERS11141863">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141863</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539896</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745419_virus.3282</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745419.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.97832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745419) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559960) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_3282_length_18786_cov_5.870918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745419_bin.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_3282_length_18786_cov_5.870918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746398_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Oribacterium;s__Oribacterium sp900772695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.1873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539897" accession="ERS11141864">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141864</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539897</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745419_virus.598</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745419.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745419) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559960) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_598_length_64034_cov_6.731460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_598_length_64034_cov_6.731460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738616_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4334;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745398_virus.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539898" accession="ERS11141865">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141865</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539898</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745419_virus.8808</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745419.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745419) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559960) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_8808_length_7220_cov_5.900182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_5_2504__NODE_8808_length_7220_cov_5.900182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747782_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D berlinense</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745419_virus.8808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539899" accession="ERS11141866">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141866</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539899</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745420_provirus.60</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745420.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.1771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745420) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_60_length_123945_cov_24.690873_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_60_length_123945_cov_24.690873_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_provirus.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539900" accession="ERS11141867">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141867</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539900</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745420_virus.1382</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745420.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.37825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745420) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_1382_length_24964_cov_3.291879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_1382_length_24964_cov_3.291879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_virus.884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539901" accession="ERS11141868">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141868</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539901</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745420_virus.287</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745420.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.53359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745420) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_287_length_59297_cov_5.532506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_287_length_59297_cov_5.532506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_virus.391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539902" accession="ERS11141869">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141869</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539902</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745420_virus.584</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745420.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.44441133531732857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.3785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745420) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_584_length_40786_cov_40.394434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_584_length_40786_cov_40.394434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539903" accession="ERS11141870">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141870</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539903</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745420_virus.834</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7745420.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>197.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745420) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560258) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_834_length_33484_cov_122.607597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_14_2281__NODE_834_length_33484_cov_122.607597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745420_virus.834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539904" accession="ERS11141871">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141871</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539904</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_provirus.22</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_22_length_297370_cov_19.394520_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745423_bin.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_22_length_297370_cov_19.394520_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_provirus.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539905" accession="ERS11141872">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141872</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539905</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_provirus.659</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.9777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_659_length_55336_cov_31.017174_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745423_bin.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_659_length_55336_cov_31.017174_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__RF32;f__CAG-239;g__RUG410;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_provirus.659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539906" accession="ERS11141873">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141873</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539906</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_virus.1348</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_1348_length_33690_cov_6.757386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7741935483870968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_1348_length_33690_cov_6.757386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_virus.1348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539907" accession="ERS11141874">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141874</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539907</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_virus.1864</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_1864_length_26268_cov_5.910962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_1864_length_26268_cov_5.910962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__P3;g__UBA3388;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746233_virus.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539908" accession="ERS11141875">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141875</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539908</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_virus.2670</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11996874984972046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_2670_length_20335_cov_6.991164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_2670_length_20335_cov_6.991164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539909" accession="ERS11141876">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141876</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539909</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_virus.552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7699339207048458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>467.991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_552_length_61816_cov_272.213010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_552_length_61816_cov_272.213010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746233_virus.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539910" accession="ERS11141877">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141877</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539910</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_virus.858</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_858_length_46015_cov_10.139884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_858_length_46015_cov_10.139884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.2134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539911" accession="ERS11141878">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141878</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539911</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745423_virus.963</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745423.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.80641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745423) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562234) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_963_length_42369_cov_4.320841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_8_1631__NODE_963_length_42369_cov_4.320841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_virus.963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539912" accession="ERS11141879">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141879</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539912</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_provirus.164</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.3435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_164_length_139784_cov_10.543029_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745425_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_164_length_139784_cov_10.543029_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738624_bin.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Alphaproteobacteria;o__RF32;f__CAG-239;g__CAAFZY01;s__CAAFZY01 sp900767645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745577_provirus.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539913" accession="ERS11141880">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141880</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539913</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_provirus.63</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.7169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_63_length_205746_cov_26.626847_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745425_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_63_length_205746_cov_26.626847_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_provirus.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539914" accession="ERS11141881">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141881</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539914</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_virus.1085</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10082464483052038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.3946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_1085_length_44056_cov_56.907683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_1085_length_44056_cov_56.907683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.1113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539915" accession="ERS11141882">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141882</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539915</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_virus.1572</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11497797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_1572_length_33900_cov_5.904562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_1572_length_33900_cov_5.904562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539916" accession="ERS11141883">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141883</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539916</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_virus.2253</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_2253_length_25225_cov_7.905082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_2253_length_25225_cov_7.905082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME086861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter;s__Fusicatenibacter saccharivorans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.3584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539917" accession="ERS11141884">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141884</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539917</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_virus.2674</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Erysipelothrix phage phi1605 virus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.6152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_2674_length_21738_cov_46.522137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_2674_length_21738_cov_46.522137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737969_virus.1469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Erysipelothrix phage phi1605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539918" accession="ERS11141885">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141885</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539918</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_virus.4190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.05087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_4190_length_15129_cov_3.326601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_4190_length_15129_cov_3.326601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900768995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.4190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539919" accession="ERS11141886">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141886</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539919</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745425_virus.757</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745425.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.2428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745425) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561787) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_757_length_57255_cov_24.163052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_12_1112__NODE_757_length_57255_cov_24.163052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539920" accession="ERS11141887">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141887</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539920</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745427_provirus.105</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7745427.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>258.919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745427) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560244) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_105_length_155373_cov_266.154447_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745427_bin.159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4186046511627907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_105_length_155373_cov_266.154447_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745427_provirus.105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539921" accession="ERS11141888">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141888</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539921</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745427_provirus.267</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745427.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01734581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.2873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745427) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560244) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_267_length_105624_cov_37.818697_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745427_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_267_length_105624_cov_37.818697_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539922" accession="ERS11141889">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141889</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539922</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745427_provirus.719</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745427.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745427) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560244) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_719_length_58478_cov_12.425506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_719_length_58478_cov_12.425506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.1355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539923" accession="ERS11141890">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141890</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539923</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745427_virus.114</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745427.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745427) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560244) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_114_length_150387_cov_8.067574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6206896551724138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_114_length_150387_cov_8.067574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_provirus.155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539924" accession="ERS11141891">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141891</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539924</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745427_virus.1736</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745427.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.4339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745427) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560244) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_1736_length_32699_cov_36.212004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_1736_length_32699_cov_36.212004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0307965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539925" accession="ERS11141892">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141892</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539925</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745427_virus.545</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745427.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745427) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560244) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_545_length_69403_cov_10.211551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7741935483870968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_22_2249__NODE_545_length_69403_cov_10.211551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747426_bin.316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D succinifaciens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745427_virus.545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539926" accession="ERS11141893">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141893</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539926</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745429_provirus.1322</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745429.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745429) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561281) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_1322_length_27344_cov_12.115818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745429_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_1322_length_27344_cov_12.115818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0091843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539927" accession="ERS11141894">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141894</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539927</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745429_provirus.280</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745429.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.0896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745429) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561281) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_280_length_79726_cov_18.756846_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_280_length_79726_cov_18.756846_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738195_bin.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738605_provirus.824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539928" accession="ERS11141895">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141895</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539928</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745429_provirus.52</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745429.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01745594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.7367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745429) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561281) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_52_length_164658_cov_18.504177_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745429_bin.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_52_length_164658_cov_18.504177_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738195_bin.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745429_provirus.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539929" accession="ERS11141896">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141896</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539929</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745429_virus.1417</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745429.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:50Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.0248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745429) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561281) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_1417_length_25876_cov_11.628009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745429_bin.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9642857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_1417_length_25876_cov_11.628009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745429_bin.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus_G;s__Enterococcus_G italicus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745429_virus.1417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539930" accession="ERS11141897">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141897</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539930</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745429_virus.2474</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745429.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06798872201629169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745429) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561281) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_2474_length_17059_cov_8.742551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_2474_length_17059_cov_8.742551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745429_virus.2474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539931" accession="ERS11141898">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141898</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539931</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745429_virus.709</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745429.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.4908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745429) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561281) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_709_length_42155_cov_22.816959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745429_bin.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_10_1632__NODE_709_length_42155_cov_22.816959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME212098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus lutetiensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745429_virus.709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539932" accession="ERS11141899">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141899</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539932</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745431_provirus.122</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745431.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745431) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561105) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_122_length_149128_cov_7.036075_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745431_bin.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_122_length_149128_cov_7.036075_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539933" accession="ERS11141900">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141900</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539933</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745431_provirus.411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745431.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.0952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745431) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561105) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_411_length_77864_cov_52.122283_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745431_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_411_length_77864_cov_52.122283_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745431_provirus.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539934" accession="ERS11141901">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141901</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539934</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745431_provirus.98</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745431.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19493392070484583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.2715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745431) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561105) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_98_length_164667_cov_45.223792_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745431_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_98_length_164667_cov_45.223792_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539935" accession="ERS11141902">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141902</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539935</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745431_virus.1305</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745431.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11543966842021684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745431) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561105) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_1305_length_35069_cov_6.243627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_1305_length_35069_cov_6.243627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746436_virus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539936" accession="ERS11141903">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141903</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539936</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745431_virus.255</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7745431.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12878892628021937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.1958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745431) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561105) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_255_length_98774_cov_10.182640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_255_length_98774_cov_10.182640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746012_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539937" accession="ERS11141904">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141904</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539937</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745431_virus.382</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745431.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.6192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745431) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561105) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_382_length_80904_cov_15.618296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_382_length_80904_cov_15.618296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745431_virus.382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539938" accession="ERS11141905">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141905</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539938</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745431_virus.8717</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745431.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.14898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745431) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561105) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_8717_length_6520_cov_5.282011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_6_1569__NODE_8717_length_6520_cov_5.282011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_provirus.13346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539939" accession="ERS11141906">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141906</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539939</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_provirus.19</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_19_length_269136_cov_10.379151_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745432_bin.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_19_length_269136_cov_10.379151_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746741_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__RUG572;s__RUG572 sp900547945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539940" accession="ERS11141907">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141907</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539940</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_provirus.80</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.8374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_80_length_173611_cov_68.043813_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745432_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_80_length_173611_cov_68.043813_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_provirus.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539941" accession="ERS11141908">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141908</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539941</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.118</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.0818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_118_length_146770_cov_11.243631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7045454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_118_length_146770_cov_11.243631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539942" accession="ERS11141909">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141909</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539942</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.1475</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_1475_length_35495_cov_6.278248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_1475_length_35495_cov_6.278248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738622_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539943" accession="ERS11141910">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141910</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539943</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.1921</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_1921_length_29102_cov_9.319518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_1921_length_29102_cov_9.319518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.1890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539944" accession="ERS11141911">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141911</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539944</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.2416</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_2416_length_24218_cov_7.835425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745432_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_2416_length_24218_cov_7.835425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0296258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539945" accession="ERS11141912">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141912</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539945</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.4029</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.17577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_4029_length_15528_cov_3.811922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_4029_length_15528_cov_3.811922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.4029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539946" accession="ERS11141913">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141913</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539946</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.614</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>238.228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_614_length_60778_cov_136.139652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_614_length_60778_cov_136.139652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539947" accession="ERS11141914">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141914</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539947</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.744</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.0323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_744_length_54621_cov_20.744133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_744_length_54621_cov_20.744133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539948" accession="ERS11141915">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141915</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539948</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745432_virus.845</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745432.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745432) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561552) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_845_length_50672_cov_14.304220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_20_1162__NODE_845_length_50672_cov_14.304220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539949" accession="ERS11141916">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141916</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539949</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745433_provirus.20</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>363.902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561097) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_20_length_312805_cov_213.669006_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745433_bin.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>44.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4827586206896552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_20_length_312805_cov_213.669006_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746692_provirus.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539950" accession="ERS11141917">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141917</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539950</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745433_provirus.690</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.29245594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:49Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561097) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_690_length_49299_cov_5.989761_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745433_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_690_length_49299_cov_5.989761_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737972_provirus.947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539951" accession="ERS11141918">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141918</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539951</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745433_virus.1739</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.37216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561097) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_1739_length_26488_cov_4.269736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_1739_length_26488_cov_4.269736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745433_virus.1739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539952" accession="ERS11141919">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141919</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539952</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745433_virus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.2753</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561097) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_272_length_82574_cov_17.123798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_272_length_82574_cov_17.123798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_virus.379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539953" accession="ERS11141920">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141920</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539953</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745433_virus.537</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7745433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.3717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561097) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_537_length_56924_cov_40.835365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_537_length_56924_cov_40.835365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539954" accession="ERS11141921">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141921</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539954</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745433_virus.851</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.8302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561097) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_851_length_43278_cov_19.058494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_13_1581__NODE_851_length_43278_cov_19.058494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900547315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.1088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539955" accession="ERS11141922">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141922</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539955</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745434_provirus.1195</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745434.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9569113353173286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.7695900000000009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745434) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_1195_length_39142_cov_5.194112_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_1195_length_39142_cov_5.194112_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME255258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp003150355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_virus.1367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539956" accession="ERS11141923">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141923</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539956</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745434_provirus.377</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745434.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745434) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_377_length_87042_cov_11.525407_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745434_bin.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_377_length_87042_cov_11.525407_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900554205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_provirus.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539957" accession="ERS11141924">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141924</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539957</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745434_provirus.74</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745434.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.6124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745434) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_74_length_209281_cov_10.203725_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745434_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_74_length_209281_cov_10.203725_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745434_provirus.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539958" accession="ERS11141925">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141925</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539958</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745434_virus.1173</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745434.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745434) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_1173_length_39838_cov_9.418953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_1173_length_39838_cov_9.418953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738188_virus.899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539959" accession="ERS11141926">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141926</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539959</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745434_virus.2071</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745434.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05335306433405912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.6773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745434) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_2071_length_24025_cov_23.109070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_2071_length_24025_cov_23.109070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_provirus.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539960" accession="ERS11141927">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141927</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539960</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745434_virus.558</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745434.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>109.553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745434) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561092) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_558_length_68126_cov_64.557040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_23_1587__NODE_558_length_68126_cov_64.557040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_virus.379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539961" accession="ERS11141928">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141928</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539961</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745438_provirus.1399</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745438.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745438) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560969) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_1399_length_36161_cov_6.217243_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745438_bin.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_1399_length_36161_cov_6.217243_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_provirus.2577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539962" accession="ERS11141929">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141929</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539962</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745438_provirus.210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745438.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01354854638802992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745438) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560969) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_210_length_124777_cov_5.820233_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745438_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_210_length_124777_cov_5.820233_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_provirus.210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539963" accession="ERS11141930">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141930</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539963</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745438_provirus.584</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745438.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01242110588677733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.5021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745438) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560969) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_584_length_66522_cov_19.582948_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_584_length_66522_cov_19.582948_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_provirus.512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539964" accession="ERS11141931">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141931</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539964</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745438_virus.1111</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745438.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.6385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745438) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560969) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_1111_length_43239_cov_49.464367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_1111_length_43239_cov_49.464367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738222_virus.858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539965" accession="ERS11141932">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141932</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539965</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745438_virus.1350</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745438.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.889977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745438) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560969) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_1350_length_37292_cov_7.717614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745438_bin.192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_1350_length_37292_cov_7.717614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539966" accession="ERS11141933">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141933</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539966</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745438_virus.2711</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745438.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.02255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745438) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560969) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_2711_length_21525_cov_4.294107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_2711_length_21525_cov_4.294107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.2674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539967" accession="ERS11141934">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141934</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539967</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745438_virus.700</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745438.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.6626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745438) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560969) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_700_length_59684_cov_12.484926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_19_1543__NODE_700_length_59684_cov_12.484926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745438_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539968" accession="ERS11141935">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141935</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539968</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745539_provirus.227</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745539.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.1936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745539) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_23_1707__NODE_227_length_39100_cov_9.461548_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_23_1707__NODE_227_length_39100_cov_9.461548_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium breve</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738155_virus.592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539969" accession="ERS11141936">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141936</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539969</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745544_provirus.1128</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.62961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562240) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_1128_length_38061_cov_5.908725_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_1128_length_38061_cov_5.908725_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp003514385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745544_provirus.1128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539970" accession="ERS11141937">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141937</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539970</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745544_provirus.409</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.9852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562240) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_409_length_70289_cov_10.255170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745544_bin.194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_409_length_70289_cov_10.255170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900541925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_provirus.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539971" accession="ERS11141938">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141938</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539971</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745544_virus.1267</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562240) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_1267_length_35206_cov_6.187708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_1267_length_35206_cov_6.187708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738208_virus.802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539972" accession="ERS11141939">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141939</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539972</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745544_virus.189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7745544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.39963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562240) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_189_length_102080_cov_5.513063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.972972972972973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_189_length_102080_cov_5.513063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738173_virus.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539973" accession="ERS11141940">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141940</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539973</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745544_virus.550</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562240) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_550_length_59557_cov_61.100353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.951219512195122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_550_length_59557_cov_61.100353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738208_virus.509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539974" accession="ERS11141941">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141941</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539974</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745544_virus.989</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745544.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745544) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562240) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_989_length_41629_cov_10.696597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_12_1633__NODE_989_length_41629_cov_10.696597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746721_virus.430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539975" accession="ERS11141942">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141942</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539975</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745545_provirus.312</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0380538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559315) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_312_length_75945_cov_6.209140_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_312_length_75945_cov_6.209140_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745545_provirus.312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539976" accession="ERS11141943">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141943</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539976</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745545_virus.1133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.4168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559315) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_1133_length_36545_cov_34.652106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_1133_length_36545_cov_34.652106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_provirus.360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539977" accession="ERS11141944">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141944</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539977</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745545_virus.174</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559315) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_174_length_99330_cov_5.574562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_174_length_99330_cov_5.574562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME111561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745545_virus.174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539978" accession="ERS11141945">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141945</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539978</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745545_virus.625</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.8122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559315) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_625_length_53852_cov_35.395686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_625_length_53852_cov_35.395686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0345548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539979" accession="ERS11141946">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141946</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539979</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745545_virus.935</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745545.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.2906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745545) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559315) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_935_length_41514_cov_31.386346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_14_2608__NODE_935_length_41514_cov_31.386346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_provirus.169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539980" accession="ERS11141947">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141947</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539980</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745556_provirus.421</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.1567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_421_length_80825_cov_20.472309_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_421_length_80825_cov_20.472309_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-877;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745556_provirus.421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539981" accession="ERS11141948">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141948</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539981</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745556_virus.1000</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.0233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_1000_length_46667_cov_4.898583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_1000_length_46667_cov_4.898583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746739_virus.544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539982" accession="ERS11141949">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141949</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539982</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745556_virus.1645</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.5731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_1645_length_32044_cov_18.974599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_1645_length_32044_cov_18.974599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747255_bin.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-884;s__CAG-884 sp000433875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_provirus.973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539983" accession="ERS11141950">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141950</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539983</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745556_virus.2334</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_2334_length_23942_cov_5.564132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_2334_length_23942_cov_5.564132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_provirus.640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539984" accession="ERS11141951">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141951</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539984</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745556_virus.37</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.9133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_37_length_242777_cov_12.084718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_37_length_242777_cov_12.084718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_virus.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539985" accession="ERS11141952">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141952</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539985</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745556_virus.875</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745556.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15383948336067438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745556) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560852) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_875_length_51816_cov_6.250565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_9_1537__NODE_875_length_51816_cov_6.250565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745382_bin.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-552;g__UBA10677;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747083_virus.997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539986" accession="ERS11141953">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141953</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539986</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745559_provirus.496</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560875) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_496_length_60711_cov_13.557872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_496_length_60711_cov_13.557872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0342275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539987" accession="ERS11141954">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141954</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539987</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745559_provirus.877</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.73814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560875) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_877_length_42341_cov_5.037763_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745559_bin.239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_877_length_42341_cov_5.037763_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_provirus.766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539988" accession="ERS11141955">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141955</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539988</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745559_virus.1633</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09419107100014808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.98262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560875) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_1633_length_27361_cov_5.712615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_1633_length_27361_cov_5.712615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539989" accession="ERS11141956">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141956</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539989</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745559_virus.240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7745559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12878892628021937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.35867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560875) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_240_length_98686_cov_5.426533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_240_length_98686_cov_5.426533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746012_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539990" accession="ERS11141957">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141957</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539990</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745559_virus.472</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560875) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_472_length_62869_cov_9.792601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_472_length_62869_cov_9.792601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539991" accession="ERS11141958">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141958</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539991</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745559_virus.836</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:48Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.86685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560875) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_836_length_43799_cov_4.550089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_836_length_43799_cov_4.550089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_virus.836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539992" accession="ERS11141959">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141959</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539992</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745559_virus.971</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Aeromonas phage phiARM81mr virus assembled from ERR7745559.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6424559471365638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745559) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560875) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_971_length_39664_cov_6.447723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_3_1539__NODE_971_length_39664_cov_6.447723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME108259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900313215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_virus.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Aeromonas phage phiARM81mr</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539993" accession="ERS11141960">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141960</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539993</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745561_provirus.38</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7745561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>441.265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560188) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_4_2261__NODE_38_length_198664_cov_273.770599_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745561_bin.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_4_2261__NODE_38_length_198664_cov_273.770599_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745561_provirus.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539994" accession="ERS11141961">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141961</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539994</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745561_virus.377</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745561.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05947136563876654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>125.689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745561) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560188) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_4_2261__NODE_377_length_45522_cov_73.412367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9393939393939394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_4_2261__NODE_377_length_45522_cov_73.412367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745561_virus.377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539995" accession="ERS11141962">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141962</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539995</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_provirus.234</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_234_length_100092_cov_9.491446_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_234_length_100092_cov_9.491446_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745989_bin.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__RUG806;s__RUG806 sp900313475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745743_provirus.1247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539996" accession="ERS11141963">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141963</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539996</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_provirus.64</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_64_length_181250_cov_45.649352_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745563_bin.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_64_length_181250_cov_45.649352_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_provirus.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539997" accession="ERS11141964">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141964</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539997</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_virus.1068</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>316.503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_1068_length_41615_cov_187.740503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_1068_length_41615_cov_187.740503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539998" accession="ERS11141965">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141965</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539998</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_virus.1239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.30894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_1239_length_37384_cov_5.509904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_1239_length_37384_cov_5.509904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_virus.1239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13539999" accession="ERS11141966">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141966</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13539999</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_virus.1694</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13539999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_1694_length_30667_cov_22.849461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_1694_length_30667_cov_22.849461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-451;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745914_virus.1335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540000" accession="ERS11141967">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141967</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540000</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_virus.2600</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03469162995594714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_2600_length_22205_cov_5.575606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_2600_length_22205_cov_5.575606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_provirus.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540001" accession="ERS11141968">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141968</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540001</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_virus.531</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.1462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_531_length_64458_cov_39.235256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_531_length_64458_cov_39.235256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540002" accession="ERS11141969">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141969</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540002</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745563_virus.660</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745563.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.5341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745563) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561109) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_660_length_57376_cov_38.566223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_19_1591__NODE_660_length_57376_cov_38.566223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746465_bin.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__P3;g__UBA3388;s__UBA3388 sp900546465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540003" accession="ERS11141970">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141970</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540003</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745565_provirus.228</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566845) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_228_length_80087_cov_7.370966_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745565_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_228_length_80087_cov_7.370966_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738233_virus.443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540004" accession="ERS11141971">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141971</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540004</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745565_provirus.89</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>459.499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566845) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_89_length_121138_cov_272.297676_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_89_length_121138_cov_272.297676_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.1407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540005" accession="ERS11141972">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141972</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540005</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745565_virus.1264</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15293544543429416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566845) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_1264_length_29451_cov_5.609348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_1264_length_29451_cov_5.609348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_provirus.223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540006" accession="ERS11141973">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141973</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540006</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745565_virus.2125</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7745565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>160.489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566845) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_2125_length_20102_cov_97.538826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_2125_length_20102_cov_97.538826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745565_virus.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540007" accession="ERS11141974">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141974</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540007</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745565_virus.688</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745565.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745565) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566845) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_688_length_43084_cov_10.496129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_5_1580__NODE_688_length_43084_cov_10.496129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540008" accession="ERS11141975">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141975</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540008</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745577_provirus.32</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.3595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561808) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_32_length_230795_cov_55.054595_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5172413793103449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_32_length_230795_cov_55.054595_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747426_bin.316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D succinifaciens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_provirus.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540009" accession="ERS11141976">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141976</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540009</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745577_virus.1079</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561808) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_1079_length_39113_cov_10.654191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_1079_length_39113_cov_10.654191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745577_virus.1079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540010" accession="ERS11141977">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141977</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540010</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745577_virus.1649</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 virus assembled from ERR7745577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0632649586071869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>149.529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561808) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_1649_length_28792_cov_91.964548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_1649_length_28792_cov_91.964548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_provirus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540011" accession="ERS11141978">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141978</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540011</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745577_virus.211</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7745577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>557.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561808) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_211_length_111014_cov_339.774854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745577_bin.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_211_length_111014_cov_339.774854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745577_virus.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540012" accession="ERS11141979">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141979</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540012</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745577_virus.342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.7663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561808) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_342_length_84356_cov_27.237426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_342_length_84356_cov_27.237426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_virus.368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540013" accession="ERS11141980">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141980</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540013</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745577_virus.594</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745577.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745577) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561808) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_594_length_58517_cov_9.879860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745577_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_14_1114__NODE_594_length_58517_cov_9.879860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_F;s__Eubacterium_F sp003491505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540014" accession="ERS11141981">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141981</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540014</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745582_provirus.1</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03986784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.1962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561354) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_1_length_883224_cov_22.191299_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745582_bin.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_1_length_883224_cov_22.191299_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0318998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540015" accession="ERS11141982">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141982</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540015</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745582_provirus.299</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.1825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561354) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_299_length_75765_cov_28.399522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745582_bin.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_299_length_75765_cov_28.399522_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0345129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540016" accession="ERS11141983">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141983</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540016</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745582_provirus.781</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.5087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561354) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_781_length_47388_cov_12.122382_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_781_length_47388_cov_12.122382_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745582_provirus.781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540017" accession="ERS11141984">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141984</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540017</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745582_virus.126</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561354) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_126_length_112161_cov_7.041406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_126_length_112161_cov_7.041406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738210_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540018" accession="ERS11141985">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141985</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540018</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745582_virus.1727</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.1302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561354) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_1727_length_30044_cov_39.883105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_1727_length_30044_cov_39.883105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745582_virus.1727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540019" accession="ERS11141986">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141986</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540019</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745582_virus.2092</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561354) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_2092_length_26455_cov_8.358139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745582_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_2092_length_26455_cov_8.358139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745556_provirus.421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540020" accession="ERS11141987">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141987</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540020</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745582_virus.484</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7745582.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745582) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561354) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_484_length_60588_cov_6.955149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_6_1630__NODE_484_length_60588_cov_6.955149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540021" accession="ERS11141988">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141988</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540021</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_provirus.1108</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.0915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_1108_length_48814_cov_11.445473_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745588_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_1108_length_48814_cov_11.445473_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_provirus.1015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540022" accession="ERS11141989">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141989</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540022</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_provirus.41</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.6309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_41_length_241891_cov_50.762015_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745588_bin.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_41_length_241891_cov_50.762015_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_provirus.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540023" accession="ERS11141990">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141990</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540023</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_provirus.926</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.4484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_926_length_55036_cov_18.528685_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5217391304347826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_926_length_55036_cov_18.528685_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540024" accession="ERS11141991">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141991</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540024</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_virus.1236</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_1236_length_45752_cov_5.902157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_1236_length_45752_cov_5.902157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.1351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540025" accession="ERS11141992">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141992</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540025</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_virus.1724</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>135.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_1724_length_36298_cov_103.276773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_1724_length_36298_cov_103.276773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540026" accession="ERS11141993">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141993</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540026</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_virus.2254</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>347.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_2254_length_29487_cov_197.887283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4705882352941176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_2254_length_29487_cov_197.887283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.1155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540027" accession="ERS11141994">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141994</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540027</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_virus.307</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_307_length_100560_cov_62.610919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9240506329113924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_307_length_100560_cov_62.610919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540028" accession="ERS11141995">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141995</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540028</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_virus.4069</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Clostridium phage phiCp-B virus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_4069_length_17885_cov_6.454290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_4069_length_17885_cov_6.454290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745648_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__UBA10281;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_virus.4069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Clostridium phage phiCp-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540029" accession="ERS11141996">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141996</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540029</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745588_virus.78</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745588.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745588) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561767) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_78_length_189654_cov_8.213982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_4_1104__NODE_78_length_189654_cov_8.213982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540030" accession="ERS11141997">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141997</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540030</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_provirus.1193</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_1193_length_47478_cov_6.993186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745594_bin.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9583333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_1193_length_47478_cov_6.993186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.2541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540031" accession="ERS11141998">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141998</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540031</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_provirus.201</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.5479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_201_length_155673_cov_15.505154_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745594_bin.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_201_length_155673_cov_15.505154_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Ruminococcus_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_provirus.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540032" accession="ERS11141999">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11141999</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540032</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_provirus.44</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_44_length_302629_cov_15.297043_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_44_length_302629_cov_15.297043_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_provirus.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540033" accession="ERS11142000">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142000</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540033</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_provirus.92</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Studiervirinae provirus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.8906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_92_length_213044_cov_20.901642_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_92_length_213044_cov_20.901642_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Klebsiella;s__Klebsiella pneumoniae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_provirus.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae; Studiervirinae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540034" accession="ERS11142001">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142001</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540034</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_virus.1462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.8741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_1462_length_40886_cov_18.679262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745594_bin.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_1462_length_40886_cov_18.679262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME192801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp000432135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_virus.1462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540035" accession="ERS11142002">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142002</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540035</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_virus.1940</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>179.069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_1940_length_33254_cov_106.530639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_1940_length_33254_cov_106.530639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME067489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-217;s__CAG-217 sp000436335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_virus.1940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540036" accession="ERS11142003">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142003</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540036</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_virus.347</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.4039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_347_length_112201_cov_28.892672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_347_length_112201_cov_28.892672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738210_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540037" accession="ERS11142004">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142004</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540037</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745594_virus.637</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745594.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745594) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560506) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_637_length_75343_cov_6.559350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_24_1866__NODE_637_length_75343_cov_6.559350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_virus.637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540038" accession="ERS11142005">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142005</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540038</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745596_provirus.181</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.5019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567174) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_181_length_113394_cov_14.987266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.32432432432432434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_181_length_113394_cov_14.987266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0377001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540039" accession="ERS11142006">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142006</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540039</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745596_provirus.541</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona provirus assembled from ERR7745596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.6619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567174) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_541_length_53030_cov_14.828565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_541_length_53030_cov_14.828565_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;s__Mediterraneibacter faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745596_provirus.541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540040" accession="ERS11142007">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142007</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540040</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745596_virus.178</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>101.011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567174) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_178_length_113673_cov_60.026709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_178_length_113673_cov_60.026709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0378844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540041" accession="ERS11142008">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142008</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540041</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745596_virus.53</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>427.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567174) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_53_length_188618_cov_251.669987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4927536231884058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_53_length_188618_cov_251.669987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola dorei</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0379517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540042" accession="ERS11142009">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142009</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540042</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745596_virus.753</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745596.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>455.045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745596) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567174) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_753_length_41006_cov_268.501234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_15_CHE1020TZ__NODE_753_length_41006_cov_268.501234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0345445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540043" accession="ERS11142010">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142010</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540043</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_provirus.112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_112_length_130869_cov_7.382630_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745603_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_112_length_130869_cov_7.382630_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900541925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_provirus.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540044" accession="ERS11142011">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142011</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540044</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_provirus.398</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_398_length_72464_cov_7.980204_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_398_length_72464_cov_7.980204_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.1298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540045" accession="ERS11142012">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142012</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540045</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_provirus.9</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.1942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_9_length_308769_cov_15.258397_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745603_bin.113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_9_length_308769_cov_15.258397_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME039567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-632;s__CAG-632 sp900539185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745603_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540046" accession="ERS11142013">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142013</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540046</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_virus.1233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00247797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_1233_length_34946_cov_6.311394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_1233_length_34946_cov_6.311394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540047" accession="ERS11142014">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142014</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540047</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_virus.1622</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.1090699999999991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_1622_length_29003_cov_5.074915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_1622_length_29003_cov_5.074915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745603_virus.1622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540048" accession="ERS11142015">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142015</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540048</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_virus.2291</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6743672824538927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_2291_length_22436_cov_37.452569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_2291_length_22436_cov_37.452569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540049" accession="ERS11142016">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142016</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540049</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_virus.379</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_379_length_73941_cov_9.782925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_379_length_73941_cov_9.782925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745603_virus.379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540050" accession="ERS11142017">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142017</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540050</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745603_virus.811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>91.8931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560391) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_811_length_46191_cov_52.734115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_4_1815__NODE_811_length_46191_cov_52.734115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540051" accession="ERS11142018">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142018</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540051</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745613_provirus.162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.2699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_162_length_145656_cov_32.545930_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745613_bin.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5247524752475248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_162_length_145656_cov_32.545930_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Lactobacillus;s__Lactobacillus paragasseri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738237_provirus.155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540052" accession="ERS11142019">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142019</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540052</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745613_provirus.59</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_59_length_237141_cov_62.914175_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745613_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_59_length_237141_cov_62.914175_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745975_bin.137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;s__CAG-488 sp000434055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745613_provirus.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540053" accession="ERS11142020">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142020</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540053</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745613_virus.1320</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.3405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_1320_length_36818_cov_10.975940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_1320_length_36818_cov_10.975940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900542795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746779_virus.1100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540054" accession="ERS11142021">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142021</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540054</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745613_virus.2549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.31881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_2549_length_21752_cov_4.069066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_2549_length_21752_cov_4.069066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540055" accession="ERS11142022">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142022</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540055</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745613_virus.414</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.6227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_414_length_84923_cov_37.069385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_414_length_84923_cov_37.069385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745613_virus.414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540056" accession="ERS11142023">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142023</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540056</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745613_virus.699</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745613.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.0977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745613) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_699_length_61038_cov_33.913929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745613_bin.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_6_1816__NODE_699_length_61038_cov_33.913929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745989_bin.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UMGS2068;s__UMGS2068 sp900769635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745613_virus.699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540057" accession="ERS11142024">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142024</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540057</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745614_provirus.29</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.1612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562173) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_29_length_306229_cov_14.880723_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9318181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_29_length_306229_cov_14.880723_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME256593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp900547385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_provirus.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540058" accession="ERS11142025">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142025</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540058</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745614_provirus.563</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562173) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_563_length_57799_cov_15.544836_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_563_length_57799_cov_15.544836_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745614_provirus.563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540059" accession="ERS11142026">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142026</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540059</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745614_virus.1223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562173) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_1223_length_34059_cov_15.701695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4117647058823529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_1223_length_34059_cov_15.701695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746233_virus.358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540060" accession="ERS11142027">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142027</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540060</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745614_virus.2047</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.63764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562173) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_2047_length_23522_cov_5.523694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_2047_length_23522_cov_5.523694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738200_provirus.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540061" accession="ERS11142028">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142028</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540061</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745614_virus.309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562173) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_309_length_83800_cov_5.986001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_309_length_83800_cov_5.986001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738613_virus.238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540062" accession="ERS11142029">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142029</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540062</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745614_virus.61</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745614.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14430120315873837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745614) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562173) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_61_length_198989_cov_9.009215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4791666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_L_1_1540__NODE_61_length_198989_cov_9.009215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900066885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540063" accession="ERS11142030">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142030</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540063</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745621_provirus.105</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.2476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561324) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_14_1278__NODE_105_length_102584_cov_37.486416_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_14_1278__NODE_105_length_102584_cov_37.486416_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738236_provirus.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540064" accession="ERS11142031">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142031</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540064</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745621_virus.248</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.7284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561324) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_14_1278__NODE_248_length_49053_cov_31.569218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_14_1278__NODE_248_length_49053_cov_31.569218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745621_virus.248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540065" accession="ERS11142032">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142032</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540065</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745621_virus.695</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745621.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745621) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561324) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_14_1278__NODE_695_length_19318_cov_11.762746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_14_1278__NODE_695_length_19318_cov_11.762746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745561_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746794_virus.495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540066" accession="ERS11142033">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142033</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540066</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_provirus.38_1</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.5146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_38_length_211868_cov_13.177316_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745625_bin.113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.627906976744186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_38_length_211868_cov_13.177316_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.1202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540067" accession="ERS11142034">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142034</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540067</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_provirus.678</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_678_length_59003_cov_6.897787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745625_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_678_length_59003_cov_6.897787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738529_provirus.276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540068" accession="ERS11142035">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142035</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540068</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_virus.1154</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.4192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_1154_length_43085_cov_30.962333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_1154_length_43085_cov_30.962333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540069" accession="ERS11142036">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142036</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540069</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_virus.165</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_165_length_118184_cov_11.037932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6949152542372882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_165_length_118184_cov_11.037932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0379068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540070" accession="ERS11142037">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142037</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540070</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_virus.199</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.7368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_199_length_109776_cov_24.320960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_199_length_109776_cov_24.320960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738240_virus.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540071" accession="ERS11142038">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142038</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540071</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_virus.2758</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:47Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.0729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_2758_length_23665_cov_14.343225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_2758_length_23665_cov_14.343225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745625_virus.2758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540072" accession="ERS11142039">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142039</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540072</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_virus.378</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_378_length_81425_cov_21.584808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_378_length_81425_cov_21.584808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540073" accession="ERS11142040">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142040</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540073</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_virus.5265</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>174.496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_5265_length_14533_cov_99.662839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_5265_length_14533_cov_99.662839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_virus.6384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540074" accession="ERS11142041">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142041</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540074</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745625_virus.677</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745625.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05708838203908831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745625) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566938) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_677_length_59028_cov_9.019677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_2_1542__NODE_677_length_59028_cov_9.019677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540075" accession="ERS11142042">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142042</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540075</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745634_provirus.99</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745634.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745634) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560393) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_8_1833__NODE_99_length_77468_cov_11.449716_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745634_bin.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_8_1833__NODE_99_length_77468_cov_11.449716_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745561_provirus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540076" accession="ERS11142043">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142043</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540076</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_provirus.121</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.5584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_121_length_188042_cov_41.463288_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745648_bin.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_121_length_188042_cov_41.463288_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747615_bin.162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UMGS363;s__UMGS363 sp900768245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_provirus.121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540077" accession="ERS11142044">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142044</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540077</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_provirus.218</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.8172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_218_length_142038_cov_35.489740_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745648_bin.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_218_length_142038_cov_35.489740_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747615_bin.162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UMGS363;s__UMGS363 sp900768245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_provirus.218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540078" accession="ERS11142045">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142045</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540078</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_provirus.410</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_410_length_98560_cov_15.148696_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745648_bin.223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_410_length_98560_cov_15.148696_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745888_bin.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus_B;s__Enterococcus_B hirae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745621_provirus.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540079" accession="ERS11142046">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142046</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540079</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_provirus.941</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_941_length_61990_cov_6.389773_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745648_bin.228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_941_length_61990_cov_6.389773_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UMGS973;s__UMGS973 sp900547295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738591_provirus.382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540080" accession="ERS11142047">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142047</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540080</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_virus.1088</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>186.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1088_length_56025_cov_106.054837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9534883720930232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1088_length_56025_cov_106.054837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745438_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp004553545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745556_virus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540081" accession="ERS11142048">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142048</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540081</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_virus.1491</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.8221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1491_length_46029_cov_20.048377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745648_bin.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1491_length_46029_cov_20.048377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.1491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540082" accession="ERS11142049">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142049</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540082</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_virus.1814</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.92195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1814_length_40188_cov_5.733739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1814_length_40188_cov_5.733739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540083" accession="ERS11142050">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142050</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540083</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_virus.1988</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1988_length_37221_cov_10.645972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_1988_length_37221_cov_10.645972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540084" accession="ERS11142051">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142051</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540084</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_virus.25620</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.2271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_25620_length_3430_cov_16.072771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_25620_length_3430_cov_16.072771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_virus.22944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540085" accession="ERS11142052">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142052</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540085</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_virus.3836</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.81283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_3836_length_20875_cov_3.130253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_3836_length_20875_cov_3.130253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540086" accession="ERS11142053">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142053</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540086</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745648_virus.836</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745648.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.0466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745648) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561548) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_836_length_67507_cov_36.866751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745648_bin.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48148148148148145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_B_22_1164__NODE_836_length_67507_cov_36.866751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540087" accession="ERS11142054">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142054</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540087</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745675_provirus.161</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7745675.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745675) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560209) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_161_length_123616_cov_7.573204_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745675_bin.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8305084745762712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_161_length_123616_cov_7.573204_2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745922_virus.576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540088" accession="ERS11142055">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142055</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540088</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745675_provirus.337</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745675.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745675) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560209) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_337_length_88919_cov_67.932937_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_337_length_88919_cov_67.932937_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745384_provirus.313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540089" accession="ERS11142056">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142056</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540089</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745675_provirus.676</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745675.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>592.245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745675) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560209) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_676_length_60650_cov_375.683836_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_676_length_60650_cov_375.683836_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540090" accession="ERS11142057">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142057</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540090</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745675_virus.1219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745675.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745675) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560209) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_1219_length_42303_cov_6.608369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_1219_length_42303_cov_6.608369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540091" accession="ERS11142058">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142058</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540091</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745675_virus.2103</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745675.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.3528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745675) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560209) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_2103_length_27933_cov_17.980076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_2103_length_27933_cov_17.980076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0307965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540092" accession="ERS11142059">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142059</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540092</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745675_virus.3057</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745675.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07943336174904672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.7321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745675) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560209) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_3057_length_20254_cov_15.920900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_3057_length_20254_cov_15.920900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745675_virus.3057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540093" accession="ERS11142060">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142060</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540093</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745675_virus.586</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745675.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.1653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745675) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560209) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_586_length_65076_cov_13.614656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_24_2253__NODE_586_length_65076_cov_13.614656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745675_virus.586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540094" accession="ERS11142061">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142061</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540094</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_provirus.116</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacillus virus G provirus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>144.239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_116_length_191092_cov_85.171405_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7358490566037735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_116_length_191092_cov_85.171405_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_provirus.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Donellivirus; Bacillus virus G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540095" accession="ERS11142062">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142062</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540095</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_provirus.454</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>385.834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_454_length_90324_cov_249.305240_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_454_length_90324_cov_249.305240_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540096" accession="ERS11142063">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142063</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540096</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_provirus.82</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_82_length_219483_cov_14.695122_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745687_bin.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_82_length_219483_cov_14.695122_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738192_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Sphaerochaetales;f__Sphaerochaetaceae;g__UBA9732;s__UBA9732 sp001940825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738213_provirus.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540097" accession="ERS11142064">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142064</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540097</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.1203</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8636976579134139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1203_length_47662_cov_12.083451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5227272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1203_length_47662_cov_12.083451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-4429795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540098" accession="ERS11142065">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142065</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540098</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.1322</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1088591976631789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>291.238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1322_length_44450_cov_175.929281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.34782608695652173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1322_length_44450_cov_175.929281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540099" accession="ERS11142066">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142066</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540099</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>109.422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_145_length_167155_cov_64.392673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6865671641791045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_145_length_167155_cov_64.392673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540100" accession="ERS11142067">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142067</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540100</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.1594</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1594_length_38443_cov_7.330736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1594_length_38443_cov_7.330736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540101" accession="ERS11142068">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142068</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540101</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.1716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1716_length_36066_cov_9.684820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1716_length_36066_cov_9.684820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540102" accession="ERS11142069">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142069</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540102</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.1908</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1370737509399937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1908_length_33293_cov_7.425518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_1908_length_33293_cov_7.425518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540103" accession="ERS11142070">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142070</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540103</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.2487</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12698150709611442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_2487_length_27165_cov_5.956807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_2487_length_27165_cov_5.956807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_provirus.1304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540104" accession="ERS11142071">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142071</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540104</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.304</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.0427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_304_length_113885_cov_11.254762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9777777777777776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_304_length_113885_cov_11.254762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540105" accession="ERS11142072">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142072</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540105</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.3502</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.22464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_3502_length_20961_cov_4.075800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_3502_length_20961_cov_4.075800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.3609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540106" accession="ERS11142073">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142073</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540106</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.452</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>132.939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_452_length_90472_cov_83.033774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6071428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_452_length_90472_cov_83.033774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745990_bin.104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;g__UBA1731;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540107" accession="ERS11142074">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142074</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540107</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.66</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.6296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_66_length_233716_cov_44.105291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.492063492063492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_66_length_233716_cov_44.105291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;s__Catenibacterium mitsuokai</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_virus.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540108" accession="ERS11142075">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142075</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540108</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.79</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_79_length_220340_cov_14.658413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_79_length_220340_cov_14.658413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540109" accession="ERS11142076">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142076</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540109</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745687_virus.974</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745687.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745687) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561781) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_974_length_55466_cov_8.916608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_6_1094__NODE_974_length_55466_cov_8.916608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME013942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-724;s__CAG-724 sp003524145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540110" accession="ERS11142077">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142077</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540110</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_provirus.1398</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16243983917627094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1398_length_47045_cov_12.622147_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745719_bin.173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1398_length_47045_cov_12.622147_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747426_bin.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;g__UBA2883;s__UBA2883 sp900768915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540111" accession="ERS11142078">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142078</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540111</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_provirus.331</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.6658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_331_length_109901_cov_16.545536_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745719_bin.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_331_length_109901_cov_16.545536_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.1266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540112" accession="ERS11142079">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142079</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540112</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_virus.1059</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1059_length_57493_cov_7.485980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5128205128205128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1059_length_57493_cov_7.485980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.1178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540113" accession="ERS11142080">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142080</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540113</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_virus.1560</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>119.953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1560_length_43408_cov_70.990746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1560_length_43408_cov_70.990746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540114" accession="ERS11142081">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142081</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540114</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_virus.1731</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.7895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1731_length_40124_cov_23.619447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_1731_length_40124_cov_23.619447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.2031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540115" accession="ERS11142082">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142082</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540115</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_virus.2113</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>96.531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_2113_length_34575_cov_56.350687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_2113_length_34575_cov_56.350687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME079077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000434935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.2011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540116" accession="ERS11142083">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142083</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540116</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_virus.3143</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Erysipelothrix phage SE-1 virus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.22699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_3143_length_25415_cov_4.670771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_3143_length_25415_cov_4.670771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-605;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_virus.1576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Erysipelothrix phage SE-1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540117" accession="ERS11142084">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142084</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540117</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_virus.4006</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_4006_length_20587_cov_7.744320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_4006_length_20587_cov_7.744320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.4006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540119" accession="ERS11142086">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142086</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540119</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745722_provirus.122</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745722.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745722) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559305) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_122_length_78540_cov_5.818042_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745722_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_122_length_78540_cov_5.818042_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME177178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus_A;s__Enterococcus_A avium</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745722_provirus.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540120" accession="ERS11142087">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142087</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540120</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745722_provirus.70</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lactobacillus phage LR1 provirus assembled from ERR7745722.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.6327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745722) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559305) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_70_length_122982_cov_13.518075_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745722_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_70_length_122982_cov_13.518075_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745722_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Limosilactobacillus;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745722_provirus.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lactobacillus phage LR1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540121" accession="ERS11142088">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142088</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540121</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745722_virus.233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lactobacillus phage vB_Lcr_AB1 virus assembled from ERR7745722.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745722) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559305) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_233_length_45587_cov_5.238189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_233_length_45587_cov_5.238189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745722_virus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lactobacillus phage vB_Lcr_AB1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540122" accession="ERS11142089">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142089</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540122</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745722_virus.279</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Uetakevirus virus assembled from ERR7745722.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.8321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745722) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559305) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_279_length_39344_cov_22.561642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_2_2614__NODE_279_length_39344_cov_22.561642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745722_virus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Uetakevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540123" accession="ERS11142090">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142090</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540123</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_provirus.102</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.7126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_102_length_175734_cov_10.993926_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745743_bin.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_102_length_175734_cov_10.993926_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_virus.2186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540124" accession="ERS11142091">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142091</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540124</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_provirus.237</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12951046580442682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_237_length_117625_cov_13.478928_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745743_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_237_length_117625_cov_13.478928_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746433_provirus.452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540125" accession="ERS11142092">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142092</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540125</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_provirus.489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis provirus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.8405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_489_length_78521_cov_11.927936_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8431372549019608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_489_length_78521_cov_11.927936_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745881_provirus.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540126" accession="ERS11142093">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142093</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540126</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_virus.1204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.7449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_1204_length_47219_cov_16.367273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5490196078431373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_1204_length_47219_cov_16.367273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540127" accession="ERS11142094">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142094</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540127</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_virus.1462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>247.603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_1462_length_41615_cov_144.200082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_1462_length_41615_cov_144.200082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540128" accession="ERS11142095">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142095</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540128</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_virus.1753</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_1753_length_36847_cov_8.753413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_1753_length_36847_cov_8.753413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.1416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540129" accession="ERS11142096">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142096</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540129</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_virus.2117</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05495594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_2117_length_32900_cov_12.491728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_2117_length_32900_cov_12.491728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.1219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540130" accession="ERS11142097">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142097</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540130</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_virus.2836</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.5597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_2836_length_26963_cov_14.796139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_2836_length_26963_cov_14.796139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540131" accession="ERS11142098">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142098</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540131</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_virus.393</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_393_length_89545_cov_6.728082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8985507246376812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_393_length_89545_cov_6.728082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;s__Lachnospira sp900545725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738181_provirus.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540132" accession="ERS11142099">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142099</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540132</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745743_virus.836</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745743.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.2551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745743) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_836_length_58529_cov_18.206374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48484848484848486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_B_15_1593__NODE_836_length_58529_cov_18.206374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540133" accession="ERS11142100">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142100</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540133</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_provirus.123</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>106.609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_123_length_194594_cov_65.446228_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745755_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8809523809523809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_123_length_194594_cov_65.446228_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745755_provirus.123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540134" accession="ERS11142101">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142101</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540134</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_provirus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_272_length_132929_cov_6.369012_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8923076923076924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_272_length_132929_cov_6.369012_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME166984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_I</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745755_provirus.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540135" accession="ERS11142102">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142102</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540135</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_provirus.77</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.7656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_77_length_234124_cov_11.743573_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745755_bin.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_77_length_234124_cov_11.743573_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME111561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745755_provirus.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540136" accession="ERS11142103">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142103</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540136</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_virus.1051</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01993392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_1051_length_59634_cov_4.849690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_1051_length_59634_cov_4.849690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540137" accession="ERS11142104">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142104</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540137</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_virus.1543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.2792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_1543_length_44499_cov_15.772815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_1543_length_44499_cov_15.772815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738352_provirus.448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540138" accession="ERS11142105">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142105</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540138</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_virus.178</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_178_length_163373_cov_8.819530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7017543859649122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_178_length_163373_cov_8.819530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745755_virus.178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540139" accession="ERS11142106">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142106</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540139</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_virus.2085</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_2085_length_35496_cov_14.038059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745755_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_2085_length_35496_cov_14.038059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__UMGS1781;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738167_provirus.974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540140" accession="ERS11142107">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142107</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540140</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_virus.2666</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.038218149912259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_2666_length_28755_cov_8.605028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5217391304347826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_2666_length_28755_cov_8.605028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.2963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540141" accession="ERS11142108">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142108</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540141</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_virus.3632</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.89875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_3632_length_21869_cov_4.457048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_3632_length_21869_cov_4.457048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_virus.1544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540142" accession="ERS11142109">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142109</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540142</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745755_virus.549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745755.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6247.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745755) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567306) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_549_length_87891_cov_3598.597695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8536585365853658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_23_RAJ1014YZ__NODE_549_length_87891_cov_3598.597695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745755_virus.549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540143" accession="ERS11142110">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142110</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540143</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_provirus.172</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>341.722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_172_length_153099_cov_220.004757_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745759_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_172_length_153099_cov_220.004757_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540144" accession="ERS11142111">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142111</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540144</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_provirus.455</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured CrAss-like virus sp. provirus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18491189427312776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_455_length_94197_cov_7.358882_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9863013698630136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_455_length_94197_cov_7.358882_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; CrAss-like virus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540145" accession="ERS11142112">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142112</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540145</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_provirus.730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_730_length_73534_cov_12.755912_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745759_bin.101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_730_length_73534_cov_12.755912_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.1407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540146" accession="ERS11142113">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142113</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540146</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.1188</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1188_length_55882_cov_7.964197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1188_length_55882_cov_7.964197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_provirus.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540147" accession="ERS11142114">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142114</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540147</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.1543</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01993392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1543_length_47462_cov_10.069009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9629629629629628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1543_length_47462_cov_10.069009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.1238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540148" accession="ERS11142115">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142115</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540148</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.1724</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1724_length_44410_cov_10.102655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1724_length_44410_cov_10.102655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1750;g__UBA7102;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540149" accession="ERS11142116">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142116</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540149</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.1913</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.1027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1913_length_41822_cov_40.800647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_1913_length_41822_cov_40.800647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UMGS973;s__UMGS973 sp900547295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_provirus.500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540150" accession="ERS11142117">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142117</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540150</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.2121</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_2121_length_39333_cov_10.365931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_2121_length_39333_cov_10.365931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter sp900547695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.2121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540151" accession="ERS11142118">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142118</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540151</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.2343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.4778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_2343_length_36482_cov_33.645296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_2343_length_36482_cov_33.645296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540152" accession="ERS11142119">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142119</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540152</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.2842</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24730176211453747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.5501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_2842_length_31629_cov_32.744232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_2842_length_31629_cov_32.744232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_provirus.1304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540153" accession="ERS11142120">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142120</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540153</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.3734</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_3734_length_25155_cov_6.766768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745759_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_3734_length_25155_cov_6.766768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0316936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540154" accession="ERS11142121">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142121</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540154</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.515</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.0189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_515_length_87750_cov_15.974907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_515_length_87750_cov_15.974907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540156" accession="ERS11142123">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142123</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540156</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.908</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_908_length_65210_cov_8.790168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6944444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_908_length_65210_cov_8.790168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540157" accession="ERS11142124">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142124</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540157</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745760_provirus.213</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus provirus assembled from ERR7745760.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.7546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745760) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567205) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_213_length_138085_cov_52.188706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_213_length_138085_cov_52.188706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME280550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Sutterella;s__Sutterella wadsworthensis_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745760_provirus.213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540158" accession="ERS11142125">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142125</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540158</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745760_virus.1217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745760.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.7445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745760) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567205) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_1217_length_41476_cov_13.780792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_1217_length_41476_cov_13.780792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738533_virus.771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540159" accession="ERS11142126">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142126</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540159</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745760_virus.1796</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745760.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.7452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745760) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567205) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_1796_length_29333_cov_43.505093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_1796_length_29333_cov_43.505093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Sutterella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745777_provirus.127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540160" accession="ERS11142127">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142127</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540160</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745760_virus.376</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade sp. virus assembled from ERR7745760.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1323425218357491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745760) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567205) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_376_length_100433_cov_25.045667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_23_RAJ0016YZ__NODE_376_length_100433_cov_25.045667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola dorei</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0368143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; UAG-readthrough crAss clade sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540162" accession="ERS11142129">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142129</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540162</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745769_provirus.424</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745769.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>210.791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745769) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567476) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_424_length_64027_cov_130.280547_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_424_length_64027_cov_130.280547_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745769_provirus.424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540163" accession="ERS11142130">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142130</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540163</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745769_virus.2030</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745769.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14641938938836815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.0696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745769) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567476) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_2030_length_19205_cov_46.184285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_2030_length_19205_cov_46.184285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__Olsenella_E sp003609875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745769_virus.2030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540164" accession="ERS11142131">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142131</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540164</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745769_virus.724</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745769.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>220.995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745769) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567476) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_724_length_44345_cov_129.011995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_724_length_44345_cov_129.011995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745769_virus.724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540165" accession="ERS11142132">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142132</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540165</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745769_virus.903</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745769.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745769) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567476) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_903_length_38311_cov_5.853298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_903_length_38311_cov_5.853298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738619_virus.1094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540166" accession="ERS11142133">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142133</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540166</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745777_provirus.327</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745777.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745777) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567368) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_19_THA0061JZ__NODE_327_length_69149_cov_5.995555_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7804878048780488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_19_THA0061JZ__NODE_327_length_69149_cov_5.995555_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0333119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540167" accession="ERS11142134">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142134</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540167</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745777_virus.1325</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745777.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.5319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745777) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567368) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_19_THA0061JZ__NODE_1325_length_17737_cov_18.685641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_19_THA0061JZ__NODE_1325_length_17737_cov_18.685641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745777_virus.1325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540170" accession="ERS11142136">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142136</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540170</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745779_provirus.181</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.6662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567330) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_181_length_136706_cov_20.834640_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8809523809523809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_181_length_136706_cov_20.834640_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738451_provirus.543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540171" accession="ERS11142137">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142137</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540171</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745779_provirus.94</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.1056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567330) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_94_length_182926_cov_53.073503_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745779_bin.159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_94_length_182926_cov_53.073503_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745779_provirus.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540172" accession="ERS11142138">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142138</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540172</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745779_virus.1165</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.5595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567330) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_1165_length_40637_cov_30.496252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_1165_length_40637_cov_30.496252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745779_virus.1165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540173" accession="ERS11142139">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142139</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540173</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745779_virus.1269</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7745779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.80167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567330) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_1269_length_38168_cov_4.864404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_1269_length_38168_cov_4.864404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540174" accession="ERS11142140">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142140</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540174</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745779_virus.223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567330) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_223_length_121259_cov_8.683872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5102040816326531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_223_length_121259_cov_8.683872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900066885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745779_virus.223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540176" accession="ERS11142142">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142142</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540176</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745779_virus.776</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.3278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567330) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_776_length_56409_cov_30.274533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_776_length_56409_cov_30.274533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002299635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745779_virus.776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540177" accession="ERS11142143">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142143</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540177</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_provirus.1007</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04719162995594713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_1007_length_60728_cov_8.787473_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745783_bin.154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_1007_length_60728_cov_8.787473_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745783_provirus.1007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540178" accession="ERS11142144">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142144</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540178</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_provirus.218</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3523678414096916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.2315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_218_length_137504_cov_56.899576_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745783_bin.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9142857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_218_length_137504_cov_56.899576_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_provirus.1534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540179" accession="ERS11142145">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142145</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540179</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_provirus.441</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.28243392070484574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>211.853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_441_length_97722_cov_120.363726_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_441_length_97722_cov_120.363726_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_virus.854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540180" accession="ERS11142146">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142146</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540180</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_provirus.646</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6797356828193831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.3729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_646_length_79125_cov_46.656892_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745783_bin.130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_646_length_79125_cov_46.656892_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME285832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002300055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738263_virus.1781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540181" accession="ERS11142147">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142147</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540181</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.10707</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_10707_length_10301_cov_10.630086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_10707_length_10301_cov_10.630086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745783_virus.10707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540182" accession="ERS11142148">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142148</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540182</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.139</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.4067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_139_length_171720_cov_32.184587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_139_length_171720_cov_32.184587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME286345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900066365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540183" accession="ERS11142149">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142149</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540183</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.1755</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseobacter phage CRP-7 virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6099118942731276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.1772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_1755_length_41627_cov_20.459230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_1755_length_41627_cov_20.459230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738552_provirus.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Roseobacter phage CRP-7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540185" accession="ERS11142151">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142151</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540185</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.27132</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Inoviridae virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>4.18477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_27132_length_4561_cov_2.257583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_27132_length_4561_cov_2.257583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738185_bin.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_J;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745783_virus.27132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540186" accession="ERS11142152">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142152</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540186</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.4107</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.11835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_4107_length_22684_cov_3.946565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_4107_length_22684_cov_3.946565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_provirus.900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540187" accession="ERS11142153">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142153</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540187</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.5694</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_5694_length_17422_cov_9.132257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_5694_length_17422_cov_9.132257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_virus.5089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540188" accession="ERS11142154">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142154</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540188</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.900</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>147.445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_900_length_65279_cov_82.400770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8780487804878049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_900_length_65279_cov_82.400770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745783_virus.900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540189" accession="ERS11142155">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142155</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540189</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_provirus.1606</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.38513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1606_length_43985_cov_5.396192_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1606_length_43985_cov_5.396192_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_provirus.1606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540191" accession="ERS11142157">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142157</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540191</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_provirus.838</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_838_length_68409_cov_6.309723_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_838_length_68409_cov_6.309723_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_provirus.838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540192" accession="ERS11142158">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142158</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540192</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_virus.1300</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.6235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1300_length_50722_cov_9.268457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1300_length_50722_cov_9.268457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME111561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_virus.1300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540193" accession="ERS11142159">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142159</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540193</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_virus.1576</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1576_length_44554_cov_13.471007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1576_length_44554_cov_13.471007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_virus.1576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540194" accession="ERS11142160">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142160</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540194</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_virus.1875</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>243.296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1875_length_39534_cov_143.463644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6410256410256411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_1875_length_39534_cov_143.463644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_virus.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540195" accession="ERS11142161">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142161</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540195</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_virus.2246</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_2246_length_34294_cov_6.911068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.53125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_2246_length_34294_cov_6.911068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_virus.2246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540196" accession="ERS11142162">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142162</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540196</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_virus.2815</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.15846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_2815_length_28548_cov_4.635454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_2815_length_28548_cov_4.635454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_virus.2815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540198" accession="ERS11142164">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142164</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540198</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_virus.95</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_95_length_197729_cov_13.758191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6304347826086957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_95_length_197729_cov_13.758191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_virus.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540199" accession="ERS11142165">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142165</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540199</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_provirus.1246</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>109.716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_1246_length_52590_cov_64.338107_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_1246_length_52590_cov_64.338107_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_provirus.1246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540200" accession="ERS11142166">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142166</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540200</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_provirus.162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_162_length_146826_cov_64.445291_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_162_length_146826_cov_64.445291_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_provirus.162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540201" accession="ERS11142167">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142167</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540201</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_provirus.2722</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.9181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_2722_length_31646_cov_30.142830_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_2722_length_31646_cov_30.142830_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_virus.2830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540202" accession="ERS11142168">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142168</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540202</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_provirus.533</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_533_length_83976_cov_9.732273_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_533_length_83976_cov_9.732273_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_virus.391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540204" accession="ERS11142170">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142170</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540204</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.1193</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_1193_length_53887_cov_16.730069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.59375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_1193_length_53887_cov_16.730069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540205" accession="ERS11142171">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142171</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540205</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.1639</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.1956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_1639_length_44317_cov_10.641817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_1639_length_44317_cov_10.641817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.1407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540206" accession="ERS11142172">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142172</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540206</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.199</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_199_length_137122_cov_12.050969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_199_length_137122_cov_12.050969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540207" accession="ERS11142173">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142173</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540207</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.2183</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11497797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.5405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_2183_length_36651_cov_33.717805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_2183_length_36651_cov_33.717805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540208" accession="ERS11142174">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142174</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540208</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.23675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.18395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_23675_length_5260_cov_3.892533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_23675_length_5260_cov_3.892533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738530_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium sp900539945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0010187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540209" accession="ERS11142175">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142175</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540209</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.2890</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.1863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_2890_length_30333_cov_18.891955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_2890_length_30333_cov_18.891955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738555_provirus.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540210" accession="ERS11142176">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142176</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540210</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.3858</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.6042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_3858_length_24324_cov_14.404091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_3858_length_24324_cov_14.404091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.2093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540211" accession="ERS11142177">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142177</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540211</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.4620</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05927917672702029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_4620_length_21117_cov_9.431939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_4620_length_21117_cov_9.431939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter_A;s__Mediterraneibacter_A butyricigenes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.4620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540213" accession="ERS11142179">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142179</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540213</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.960</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.0529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_960_length_61115_cov_20.622186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6551724137931034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_960_length_61115_cov_20.622186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745545_virus.467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540214" accession="ERS11142180">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142180</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540214</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_provirus.1742</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07725770925110134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>200.673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1742_length_35026_cov_114.780852_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1742_length_35026_cov_114.780852_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.4060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540215" accession="ERS11142181">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142181</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540215</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_provirus.447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_447_length_90888_cov_8.788186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745816_bin.238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_447_length_90888_cov_8.788186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.2157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540216" accession="ERS11142182">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142182</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540216</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_virus.1026</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1026_length_52457_cov_11.111798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1026_length_52457_cov_11.111798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738219_virus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540217" accession="ERS11142183">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142183</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540217</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_virus.1295</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>172.762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1295_length_43517_cov_98.031860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1295_length_43517_cov_98.031860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745924_bin.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__Firm-10;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.3571550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540219" accession="ERS11142185">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142185</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540219</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_virus.1699</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1699_length_35750_cov_5.972612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1699_length_35750_cov_5.972612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540220" accession="ERS11142186">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142186</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540220</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_virus.2116</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14975453690774357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_2116_length_29990_cov_8.211781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_2116_length_29990_cov_8.211781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745594_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;s__UBA636 sp900546285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1081591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540221" accession="ERS11142187">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142187</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540221</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_virus.350</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03799472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>503.812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_350_length_104493_cov_282.803555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9850746268656716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_350_length_104493_cov_282.803555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0369190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540222" accession="ERS11142188">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142188</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540222</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_virus.711</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.9124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_711_length_66617_cov_23.545582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_711_length_66617_cov_23.545582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540223" accession="ERS11142189">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142189</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540223</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_provirus.1164</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.67055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_1164_length_49909_cov_5.061266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_1164_length_49909_cov_5.061266_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747255_bin.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-884;s__CAG-884 sp000433875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_provirus.1164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540224" accession="ERS11142190">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142190</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540224</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_provirus.178</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.3588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_178_length_139561_cov_58.871699_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745819_bin.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4482758620689655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_178_length_139561_cov_58.871699_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME286345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900066365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738268_virus.1266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540225" accession="ERS11142191">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142191</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540225</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_provirus.553</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>421.651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_553_length_76825_cov_250.192161_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_553_length_76825_cov_250.192161_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_provirus.553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540227" accession="ERS11142193">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142193</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540227</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_virus.1636</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.4108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_1636_length_39953_cov_4.148861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_1636_length_39953_cov_4.148861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540228" accession="ERS11142194">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142194</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540228</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_virus.2143</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10236784140969162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.8718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_2143_length_32934_cov_29.029004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_2143_length_32934_cov_29.029004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_provirus.1304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540229" accession="ERS11142195">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142195</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540229</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_virus.3693</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19741189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.83556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_3693_length_21440_cov_4.577353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_3693_length_21440_cov_4.577353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738227_bin.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp004557855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745410_virus.1183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540230" accession="ERS11142196">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142196</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540230</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_virus.596</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Schitoviridae virus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06303277487972782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_596_length_73968_cov_12.564318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_596_length_73968_cov_12.564318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738189_virus.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Schitoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540231" accession="ERS11142197">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142197</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540231</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_provirus.1466</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_1466_length_51893_cov_9.575807_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745823_bin.236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_1466_length_51893_cov_9.575807_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_provirus.1466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540233" accession="ERS11142199">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142199</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540233</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_provirus.360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.1141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_360_length_115057_cov_24.262280_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3235294117647059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_360_length_115057_cov_24.262280_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_provirus.360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540234" accession="ERS11142200">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142200</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540234</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_provirus.566</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.3118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_566_length_90825_cov_16.494898_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745823_bin.124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_566_length_90825_cov_16.494898_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_provirus.566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540235" accession="ERS11142201">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142201</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540235</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_provirus.751</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_751_length_77485_cov_7.834901_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745823_bin.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_751_length_77485_cov_7.834901_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME110833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__COE1;s__COE1 sp900753305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_provirus.751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540236" accession="ERS11142202">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142202</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540236</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.1064</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_1064_length_63454_cov_11.110087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_1064_length_63454_cov_11.110087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__UBA10281;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0347918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540237" accession="ERS11142203">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142203</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540237</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.1480</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_1480_length_51558_cov_6.142791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6590909090909091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_1480_length_51558_cov_6.142791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540238" accession="ERS11142204">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142204</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540238</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.2039</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16375920696193652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.7813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_2039_length_41232_cov_13.249812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_2039_length_41232_cov_13.249812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540239" accession="ERS11142205">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142205</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540239</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.2437</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_2437_length_36583_cov_6.892429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_2437_length_36583_cov_6.892429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__F23-B02;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_virus.2437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540240" accession="ERS11142206">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142206</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540240</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.27966</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06798872201629169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_27966_length_5558_cov_6.601168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_27966_length_5558_cov_6.601168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_virus.27966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540242" accession="ERS11142208">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142208</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540242</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.5560</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_5560_length_20471_cov_6.070315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_5560_length_20471_cov_6.070315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_virus.1931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540243" accession="ERS11142209">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142209</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540243</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.9104</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02984581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_9104_length_14155_cov_12.190767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_9104_length_14155_cov_12.190767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_virus.9104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540244" accession="ERS11142210">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142210</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540244</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_provirus.221</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>249.289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_221_length_112949_cov_137.398823_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745828_bin.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_221_length_112949_cov_137.398823_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_provirus.221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540245" accession="ERS11142211">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142211</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540245</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_provirus.516</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>105.587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_516_length_74676_cov_62.125605_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48484848484848486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_516_length_74676_cov_62.125605_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_provirus.516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540246" accession="ERS11142212">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142212</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540246</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_provirus.861</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>306.366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_861_length_55530_cov_168.583142_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_861_length_55530_cov_168.583142_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_provirus.363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540248" accession="ERS11142214">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142214</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540248</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_virus.1982</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0752082847671273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_1982_length_30929_cov_14.808829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_1982_length_30929_cov_14.808829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.2075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540249" accession="ERS11142215">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142215</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540249</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_virus.279</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_279_length_102194_cov_17.296924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5813953488372093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_279_length_102194_cov_17.296924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540250" accession="ERS11142216">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142216</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540250</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_virus.4150</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18068787764054067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_4150_length_16540_cov_6.407398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_4150_length_16540_cov_6.407398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.4150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540251" accession="ERS11142217">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142217</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540251</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_virus.821</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.6801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_821_length_57359_cov_45.704305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_821_length_57359_cov_45.704305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540252" accession="ERS11142218">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142218</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540252</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745831_provirus.190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7745831.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11873985389093908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.9796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745831) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567123) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_190_length_98825_cov_12.673178_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_190_length_98825_cov_12.673178_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540253" accession="ERS11142219">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142219</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540253</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745831_provirus.716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745831.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9061536050499776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745831) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567123) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_716_length_48864_cov_18.903806_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_716_length_48864_cov_18.903806_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738353_virus.937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540254" accession="ERS11142220">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142220</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540254</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745831_virus.1240</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745831.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.5654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745831) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567123) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_1240_length_34620_cov_40.811366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7941176470588235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_1240_length_34620_cov_40.811366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R sp900542875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_virus.1240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540256" accession="ERS11142222">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142222</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540256</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745831_virus.807</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745831.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>78.2589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745831) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567123) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_807_length_45704_cov_45.386723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_807_length_45704_cov_45.386723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_virus.1079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540257" accession="ERS11142223">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142223</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540257</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_provirus.1053</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_1053_length_53311_cov_12.072416_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745835_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_1053_length_53311_cov_12.072416_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0278013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540258" accession="ERS11142224">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142224</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540258</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_provirus.188</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_188_length_131701_cov_7.565148_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6274509803921569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_188_length_131701_cov_7.565148_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_provirus.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540259" accession="ERS11142225">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142225</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540259</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_provirus.495</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.8758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_495_length_81458_cov_17.173407_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745835_bin.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_495_length_81458_cov_17.173407_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_provirus.495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540260" accession="ERS11142226">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142226</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540260</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_provirus.86</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_86_length_182266_cov_12.567510_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745835_bin.185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7037037037037037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_86_length_182266_cov_12.567510_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_provirus.1146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540262" accession="ERS11142228">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142228</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540262</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_virus.1741</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07834114492378066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.8748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_1741_length_39405_cov_13.170718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745835_bin.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_1741_length_39405_cov_13.170718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus thermophilus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_virus.1741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540263" accession="ERS11142229">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142229</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540263</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_virus.2076</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.32079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_2076_length_34855_cov_5.109149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_2076_length_34855_cov_5.109149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.1250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540264" accession="ERS11142230">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142230</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540264</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_virus.2827</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_2827_length_27385_cov_7.046397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_2827_length_27385_cov_7.046397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0299347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540265" accession="ERS11142231">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142231</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540265</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_virus.533</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>114.353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_533_length_78077_cov_68.699397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.912280701754386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_533_length_78077_cov_68.699397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_virus.533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540266" accession="ERS11142232">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142232</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540266</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_virus.980</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>223.031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_980_length_55850_cov_136.395801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_980_length_55850_cov_136.395801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_virus.1999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540267" accession="ERS11142233">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142233</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540267</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745836_provirus.153</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745836.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.6878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745836) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_153_length_117008_cov_34.156571_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745836_bin.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_153_length_117008_cov_34.156571_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738599_virus.1890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540268" accession="ERS11142234">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142234</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540268</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745836_provirus.353</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745836.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.129625550660793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745836) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_353_length_73311_cov_14.652757_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_353_length_73311_cov_14.652757_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745836_provirus.353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540269" accession="ERS11142235">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142235</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540269</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745836_provirus.770</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745836.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.1012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745836) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_770_length_45991_cov_12.816243_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_770_length_45991_cov_12.816243_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745836_provirus.770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540271" accession="ERS11142237">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142237</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540271</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745836_virus.210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7745836.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11416849755093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>126.646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745836) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_210_length_96420_cov_70.798086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9315068493150684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_210_length_96420_cov_70.798086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540272" accession="ERS11142238">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142238</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540272</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745836_virus.569</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745836.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20722793596202008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>115.569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745836) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_569_length_55452_cov_66.593156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_569_length_55452_cov_66.593156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738619_provirus.454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540273" accession="ERS11142239">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142239</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540273</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745836_virus.923</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745836.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.4852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745836) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_923_length_40842_cov_13.113112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_923_length_40842_cov_13.113112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745836_virus.923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540274" accession="ERS11142240">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142240</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540274</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_provirus.1465</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu provirus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25195538818076474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.4369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1465_length_52166_cov_27.756091_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1465_length_52166_cov_27.756091_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_provirus.1465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540275" accession="ERS11142241">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142241</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540275</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_provirus.217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10290470637156364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_217_length_145508_cov_9.026851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745845_bin.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_217_length_145508_cov_9.026851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_provirus.2338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540277" accession="ERS11142243">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142243</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540277</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_provirus.850</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_850_length_72917_cov_7.001840_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_850_length_72917_cov_7.001840_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738151_bin.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter sp900548765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_provirus.850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540278" accession="ERS11142244">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142244</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540278</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.1098</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.5031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1098_length_61864_cov_10.674851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1098_length_61864_cov_10.674851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0339891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540279" accession="ERS11142245">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142245</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540279</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.1274</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.8037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1274_length_57114_cov_28.064555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1274_length_57114_cov_28.064555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540280" accession="ERS11142246">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142246</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540280</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.1593</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.6991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1593_length_49375_cov_28.576900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1593_length_49375_cov_28.576900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900542795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.2014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540281" accession="ERS11142247">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142247</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540281</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.1935</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1935_length_43517_cov_10.072629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745845_bin.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_1935_length_43517_cov_10.072629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540282" accession="ERS11142248">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142248</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540282</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.22137</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Microviridae virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.42005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_22137_length_5178_cov_3.226034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_22137_length_5178_cov_3.226034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME153344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900549705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.23843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540283" accession="ERS11142249">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142249</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540283</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.2388</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_2388_length_37452_cov_6.448107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_2388_length_37452_cov_6.448107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME285832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002300055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.1352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540285" accession="ERS11142251">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142251</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540285</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.465</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>133.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_465_length_101971_cov_77.212378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4838709677419355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_465_length_101971_cov_77.212378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME172369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-917;g__CAG-349;s__CAG-349 sp003539515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540286" accession="ERS11142252">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142252</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540286</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.6807</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05011329479852357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_6807_length_16174_cov_11.674722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_6807_length_16174_cov_11.674722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME239799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eisenbergiella;s__Eisenbergiella sp900066775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.7813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540287" accession="ERS11142253">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142253</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540287</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745852_provirus.203</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745852.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.5418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745852) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560337) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_23_1813__NODE_203_length_116847_cov_50.470455_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_23_1813__NODE_203_length_116847_cov_50.470455_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745852_provirus.203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540288" accession="ERS11142254">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142254</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540288</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745852_provirus.762</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745852.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12525657272474092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1598.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745852) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560337) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_23_1813__NODE_762_length_43084_cov_924.106122_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745852_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47368421052631576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_23_1813__NODE_762_length_43084_cov_924.106122_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides ovatus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0322078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540289" accession="ERS11142255">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142255</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540289</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745852_virus.532</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745852.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2557.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745852) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560337) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_23_1813__NODE_532_length_59181_cov_1469.854240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.32142857142857145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_O_23_1813__NODE_532_length_59181_cov_1469.854240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides ovatus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745852_virus.532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540291" accession="ERS11142257">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142257</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540291</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_provirus.171</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>198.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_171_length_161602_cov_121.874744_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745854_bin.346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_171_length_161602_cov_121.874744_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745625_provirus.431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540292" accession="ERS11142258">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142258</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540292</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_provirus.248</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_248_length_137943_cov_7.944272_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745854_bin.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5161290322580645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_248_length_137943_cov_7.944272_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540293" accession="ERS11142259">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142259</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540293</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_provirus.379</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_379_length_112480_cov_13.035497_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_379_length_112480_cov_13.035497_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738606_virus.1947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540294" accession="ERS11142260">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142260</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540294</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_provirus.786</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_786_length_79151_cov_8.485381_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745854_bin.304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_786_length_79151_cov_8.485381_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_provirus.786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540295" accession="ERS11142261">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142261</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540295</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.1111</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.0794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_1111_length_64239_cov_71.172641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_1111_length_64239_cov_71.172641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_virus.1111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540296" accession="ERS11142262">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142262</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540296</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.1366</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_1366_length_57031_cov_6.586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_1366_length_57031_cov_6.586017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME013942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-724;s__CAG-724 sp003524145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745398_virus.695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540297" accession="ERS11142263">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142263</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540297</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.1887</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Paenibacillus phage vB_PlaP_API480 virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2023678414096917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.18969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_1887_length_45434_cov_5.319576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_1887_length_45434_cov_5.319576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746285_bin.189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Paenibacillus phage vB_PlaP_API480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540298" accession="ERS11142264">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142264</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540298</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.2185</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_2185_length_41326_cov_7.654780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_2185_length_41326_cov_7.654780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540300" accession="ERS11142266">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142266</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540300</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.2756</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3368672824538925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.56061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_2756_length_34882_cov_4.809252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_2756_length_34882_cov_4.809252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540301" accession="ERS11142267">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142267</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540301</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.3244</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06384348183251046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540301</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.0708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_3244_length_31217_cov_16.618433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745854_bin.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_3244_length_31217_cov_16.618433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540302" accession="ERS11142268">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142268</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540302</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.495</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12878892628021937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_495_length_98774_cov_10.024519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_495_length_98774_cov_10.024519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746012_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540303" accession="ERS11142269">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142269</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540303</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.6623</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06574293217546884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.22254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_6623_length_18097_cov_4.091176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_6623_length_18097_cov_4.091176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745429_provirus.538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540304" accession="ERS11142270">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142270</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540304</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.9842</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01996515875021345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_9842_length_13262_cov_28.392188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_9842_length_13262_cov_28.392188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_virus.5711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540306" accession="ERS11142272">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142272</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540306</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_provirus.2360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.7879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_2360_length_36729_cov_26.450344_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745858_bin.243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_2360_length_36729_cov_26.450344_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745908_bin.180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales;f__Monoglobaceae;g__UMGS1820;s__UMGS1820 sp902797885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745858_provirus.2360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540307" accession="ERS11142273">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142273</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540307</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_provirus.522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.5088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_522_length_102177_cov_48.527767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_522_length_102177_cov_48.527767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0344636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540308" accession="ERS11142274">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142274</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540308</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_provirus.930</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.2597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_930_length_69961_cov_51.673788_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_930_length_69961_cov_51.673788_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540309" accession="ERS11142275">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142275</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540309</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_virus.1490</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_1490_length_51193_cov_7.384557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_1490_length_51193_cov_7.384557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745858_virus.1490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540310" accession="ERS11142276">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142276</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540310</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_virus.1787</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3385.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_1787_length_44682_cov_1863.908082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_1787_length_44682_cov_1863.908082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp003525905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745858_virus.1787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540311" accession="ERS11142277">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142277</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540311</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_virus.2001</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_2001_length_41147_cov_11.332968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745858_bin.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_2001_length_41147_cov_11.332968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__UBA1191 sp900549125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738272_virus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540313" accession="ERS11142279">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142279</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540313</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_virus.3652</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.5458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_3652_length_26233_cov_4.050314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_3652_length_26233_cov_4.050314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME255646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900552475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_virus.2039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540314" accession="ERS11142280">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142280</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540314</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_virus.931</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.0011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_931_length_69954_cov_28.332670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4897959183673469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_931_length_69954_cov_28.332670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540315" accession="ERS11142281">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142281</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540315</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_provirus.1622</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_1622_length_54415_cov_7.494093_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745859_bin.219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_1622_length_54415_cov_7.494093_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540118" accession="ERS11142085">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142085</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540118</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745719_virus.5992</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Gokushovirus WZ-2015a virus assembled from ERR7745719.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:46Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745719) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559922) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_5992_length_14643_cov_10.744817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_5_2492__NODE_5992_length_14643_cov_10.744817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.5992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; unclassified Gokushovirinae; Gokushovirus WZ-2015a</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540155" accession="ERS11142122">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142122</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540155</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745759_virus.6533</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745759.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.6084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745759) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_6533_length_15873_cov_23.130031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_A_10_1110__NODE_6533_length_15873_cov_23.130031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.6533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540161" accession="ERS11142128">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142128</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540161</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745769_provirus.148</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745769.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.174534272590563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>190.738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745769) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567476) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_148_length_117735_cov_113.807042_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745769_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_1_THA1067YZ__NODE_148_length_117735_cov_113.807042_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0332956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540169" accession="ERS11142135">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142135</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540169</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745777_virus.719</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745777.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.3202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745777) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567368) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_19_THA0061JZ__NODE_719_length_35189_cov_13.350308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745777_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_19_THA0061JZ__NODE_719_length_35189_cov_13.350308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540175" accession="ERS11142141">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142141</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540175</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745779_virus.505</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>219.048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567330) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_505_length_73784_cov_123.910036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_17_RAU1014YZ__NODE_505_length_73784_cov_123.910036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745779_virus.505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540184" accession="ERS11142150">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142150</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540184</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745783_virus.2189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745783.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745783) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559670) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_2189_length_36059_cov_22.629037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_24_2330__NODE_2189_length_36059_cov_22.629037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738604_bin.142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738217_provirus.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540190" accession="ERS11142156">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142156</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540190</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_provirus.274</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>165.508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_274_length_127889_cov_101.863557_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.40540540540540543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_274_length_127889_cov_101.863557_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745787_provirus.274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540197" accession="ERS11142163">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142163</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540197</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745787_virus.3943</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Cequinquevirus virus assembled from ERR7745787.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.4236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745787) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_3943_length_21493_cov_20.459703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_7_CHE0004BZ__NODE_3943_length_21493_cov_20.459703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738558_virus.1847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Cequinquevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540203" accession="ERS11142169">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142169</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540203</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_provirus.925</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:45Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_925_length_62489_cov_6.130616_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745815_bin.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_925_length_62489_cov_6.130616_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__TF01-11;s__TF01-11 sp003529475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_provirus.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540212" accession="ERS11142178">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142178</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540212</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745815_virus.805</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745815.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745815) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567746) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_805_length_67488_cov_7.991455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745815_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_22_1655__NODE_805_length_67488_cov_7.991455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745815_virus.805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540218" accession="ERS11142184">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142184</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540218</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745816_virus.1425</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745816.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.9058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745816) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561185) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1425_length_40826_cov_15.143734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_G_11_1585__NODE_1425_length_40826_cov_15.143734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.1121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540226" accession="ERS11142192">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142192</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540226</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745819_virus.1097</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745819.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745819) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559689) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_1097_length_51891_cov_12.279287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745819_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8484848484848485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_20_2340__NODE_1097_length_51891_cov_12.279287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746111_bin.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.1348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540232" accession="ERS11142198">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142198</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540232</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_provirus.20</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:44Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.5482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_20_length_364120_cov_17.950761_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745823_bin.344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_20_length_364120_cov_17.950761_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738240_bin.221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R sp900539325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540241" accession="ERS11142207">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142207</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540241</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745823_virus.40678</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745823.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.33039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745823) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559376) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_40678_length_4006_cov_4.111733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_18_2610__NODE_40678_length_4006_cov_4.111733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745823_virus.40678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540247" accession="ERS11142213">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142213</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540247</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745828_virus.1269</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745828.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2099118942731278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.6845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745828) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560377) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_1269_length_43058_cov_22.554640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_2_1814__NODE_1269_length_43058_cov_22.554640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.1269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540255" accession="ERS11142221">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142221</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540255</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745831_virus.40</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745831.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745831) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567123) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_40_length_199900_cov_6.113736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9642857142857144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_13_CHE0012SZ__NODE_40_length_199900_cov_6.113736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_virus.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540261" accession="ERS11142227">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142227</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540261</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745835_virus.1424</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745835.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:43Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.4249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745835) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560711) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_1424_length_44820_cov_22.124735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_9_1723__NODE_1424_length_44820_cov_22.124735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.1026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540270" accession="ERS11142236">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142236</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540270</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745836_virus.1272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745836.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.70954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745836) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567121) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_1272_length_32533_cov_4.021691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_9_CHE0010BZ__NODE_1272_length_32533_cov_4.021691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745836_virus.1272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540276" accession="ERS11142242">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142242</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540276</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_provirus.471</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe provirus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_471_length_101148_cov_13.573280_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745845_bin.169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9836065573770492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_471_length_101148_cov_13.573280_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540284" accession="ERS11142250">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142250</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540284</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745845_virus.2990</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745845.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.2478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745845) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559413) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_2990_length_31854_cov_37.740315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_18_2626__NODE_2990_length_31854_cov_37.740315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745594_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;s__UBA636 sp900546285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_virus.2990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540290" accession="ERS11142256">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142256</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540290</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_provirus.11962</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. provirus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15966643118087134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.3563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_11962_length_11357_cov_3.505319_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_11962_length_11357_cov_3.505319_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>uncultured Microviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME022295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus sp900544435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_provirus.4601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540299" accession="ERS11142265">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142265</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540299</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745854_virus.247</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745854.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745854) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567859) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_247_length_138006_cov_10.646782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5471698113207547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_10_1531__NODE_247_length_138006_cov_10.646782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0374893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540305" accession="ERS11142271">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142271</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540305</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_provirus.1485</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15462555066079303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.7395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_1485_length_51335_cov_55.796832_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_1485_length_51335_cov_55.796832_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;s__Roseburia sp900552665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745858_provirus.1485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540312" accession="ERS11142278">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142278</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540312</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745858_virus.2529</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745858.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.827477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.4109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745858) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567238) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_2529_length_34965_cov_29.473573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_H_19_RAJ0022YZ__NODE_2529_length_34965_cov_29.473573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540316" accession="ERS11142282">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142282</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540316</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_provirus.2477</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06941133531732865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.9723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2477_length_39651_cov_28.467302_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2477_length_39651_cov_28.467302_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0226686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540317" accession="ERS11142283">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142283</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540317</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_provirus.5018</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25194340863071135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.2624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_5018_length_23724_cov_37.115956_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_5018_length_23724_cov_37.115956_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.2612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540318" accession="ERS11142284">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142284</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540318</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_provirus.998</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.7935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_998_length_74974_cov_17.471875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745859_bin.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_998_length_74974_cov_17.471875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME176098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_provirus.776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540319" accession="ERS11142285">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142285</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540319</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.1264</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.9583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_1264_length_63924_cov_19.209391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9696969696969696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_1264_length_63924_cov_19.209391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745859_virus.1264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540320" accession="ERS11142286">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142286</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540320</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.14343</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_14343_length_9981_cov_11.503534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_14343_length_9981_cov_11.503534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.10555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540321" accession="ERS11142287">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142287</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540321</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.1554</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.7445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_1554_length_55962_cov_24.594936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_1554_length_55962_cov_24.594936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.1503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540322" accession="ERS11142288">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142288</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540322</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.18737</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_18737_length_7937_cov_5.784733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_18737_length_7937_cov_5.784733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738227_bin.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp004557855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745859_virus.18737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540323" accession="ERS11142289">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142289</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540323</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.2135</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06347739359623317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.5601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2135_length_44017_cov_5.513314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2135_length_44017_cov_5.513314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME010079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1865;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.1020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540324" accession="ERS11142290">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142290</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540324</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.2459</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.57695538818076475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2459_length_39827_cov_9.366692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5925925925925926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2459_length_39827_cov_9.366692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737966_provirus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540325" accession="ERS11142291">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142291</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540325</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.2746</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2746_length_37269_cov_14.223677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_2746_length_37269_cov_14.223677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.2480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540326" accession="ERS11142292">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142292</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540326</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.314</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.9136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_314_length_135267_cov_8.810874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_314_length_135267_cov_8.810874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745859_virus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540327" accession="ERS11142293">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142293</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540327</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.3728</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>168.163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_3728_length_29777_cov_101.118889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_3728_length_29777_cov_101.118889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.2034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540328" accession="ERS11142294">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142294</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540328</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.4589</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_4589_length_25483_cov_11.249154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_4589_length_25483_cov_11.249154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738145_provirus.803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540329" accession="ERS11142295">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142295</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540329</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.5318</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13727973568281945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:20Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_5318_length_22654_cov_9.352748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_5318_length_22654_cov_9.352748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738661_bin.249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.4692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540330" accession="ERS11142296">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142296</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540330</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.617</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr130_1 virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>291.748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_617_length_97742_cov_173.653909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7628865979381443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_617_length_97742_cov_173.653909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella stercorea</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; crAssphage cr130_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540331" accession="ERS11142297">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142297</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540331</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.7789</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.4556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_7789_length_16618_cov_27.447131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_7789_length_16618_cov_27.447131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.6784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540332" accession="ERS11142298">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142298</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540332</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745859_virus.9383</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745859.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:42Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745859) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567976) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_9383_length_14199_cov_6.959992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_4_1166__NODE_9383_length_14199_cov_6.959992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.8602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540333" accession="ERS11142299">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142299</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540333</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745867_provirus.334</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745867.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.26396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745867) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559896) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_334_length_92320_cov_5.199115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745867_bin.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_334_length_92320_cov_5.199115_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME233621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_D;s__Ruminococcus_D sp000434695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_provirus.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540334" accession="ERS11142300">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142300</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540334</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745867_provirus.612</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745867.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.852477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.9648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745867) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559896) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_612_length_67753_cov_16.263240_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_612_length_67753_cov_16.263240_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_provirus.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540335" accession="ERS11142301">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142301</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540335</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745867_virus.1213</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745867.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03995011113590989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745867) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559896) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_1213_length_43549_cov_5.885305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_1213_length_43549_cov_5.885305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738157_bin.149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.1038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540336" accession="ERS11142302">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142302</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540336</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745867_virus.1475</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745867.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745867) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559896) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_1475_length_37514_cov_12.272431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745867_bin.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_1475_length_37514_cov_12.272431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME006546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-605;s__CAG-605 sp000433255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745366_provirus.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540337" accession="ERS11142303">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142303</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540337</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745867_virus.2450</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745867.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12241189427312776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.89861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745867) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559896) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_2450_length_24876_cov_4.343159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_2450_length_24876_cov_4.343159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__MARSEILLE-P3954;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745719_virus.1568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540338" accession="ERS11142304">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142304</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540338</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745867_virus.3368</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745867.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745867) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559896) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_3368_length_18610_cov_7.372363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745867_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_3368_length_18610_cov_7.372363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.5235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540339" accession="ERS11142305">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142305</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540339</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745867_virus.654</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7745867.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745867) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559896) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_654_length_65458_cov_6.163411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745867_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_21_2489__NODE_654_length_65458_cov_6.163411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540340" accession="ERS11142306">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142306</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540340</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_provirus.1293</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0254285490843079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.93921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1293_length_49140_cov_5.384526_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745870_bin.304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1293_length_49140_cov_5.384526_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_provirus.1293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540341" accession="ERS11142307">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142307</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540341</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_provirus.2307</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_2307_length_33131_cov_9.638682_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745870_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_2307_length_33131_cov_9.638682_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-127;s__CAG-127 sp900319515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_provirus.2307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540342" accession="ERS11142308">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142308</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540342</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_provirus.51</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_51_length_241661_cov_12.071420_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745870_bin.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_51_length_241661_cov_12.071420_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME067489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-217;s__CAG-217 sp000436335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_provirus.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540343" accession="ERS11142309">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142309</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540343</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_provirus.829</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00994313231849539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_829_length_63667_cov_6.533260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_829_length_63667_cov_6.533260_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900542795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540344" accession="ERS11142310">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142310</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540344</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.1164</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>301.341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1164_length_52246_cov_175.800782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1164_length_52246_cov_175.800782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540345" accession="ERS11142311">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142311</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540345</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.1469</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1469_length_45091_cov_14.390945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1469_length_45091_cov_14.390945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_virus.1469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540346" accession="ERS11142312">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142312</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540346</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.1624</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>671.847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1624_length_42273_cov_364.127642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1624_length_42273_cov_364.127642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_virus.1624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540347" accession="ERS11142313">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142313</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540347</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.1782</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vibrio phage 1.205.O._10N.222.51.A7 virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>90.0827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1782_length_39531_cov_53.541517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1782_length_39531_cov_53.541517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.1416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Vibrio phage 1.205.O._10N.222.51.A7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540348" accession="ERS11142314">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142314</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540348</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.1916</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>155.862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1916_length_37738_cov_91.881894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_1916_length_37738_cov_91.881894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745420_virus.171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540349" accession="ERS11142315">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142315</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540349</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.2081</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.0905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_2081_length_35583_cov_32.233059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_2081_length_35583_cov_32.233059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738144_virus.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540350" accession="ERS11142316">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142316</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540350</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.2770</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15387564972157636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.38836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_2770_length_28920_cov_4.772319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_2770_length_28920_cov_4.772319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.2014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540351" accession="ERS11142317">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142317</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540351</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.3611</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_3611_length_23812_cov_8.302549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_3611_length_23812_cov_8.302549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_virus.3611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540352" accession="ERS11142318">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142318</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540352</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.4565</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01358752575482346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_4565_length_20087_cov_6.677261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_4565_length_20087_cov_6.677261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738567_virus.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540353" accession="ERS11142319">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142319</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540353</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.642</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>95.8647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_642_length_73291_cov_52.882960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_642_length_73291_cov_52.882960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_virus.642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540354" accession="ERS11142320">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142320</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540354</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745870_virus.921</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745870.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.1147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745870) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560649) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_921_length_59973_cov_6.490400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6829268292682927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_15_1703__NODE_921_length_59973_cov_6.490400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540355" accession="ERS11142321">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142321</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540355</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745874_provirus.47</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745874.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.1919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745874) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566850) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_47_length_115207_cov_36.976027_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745874_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_47_length_115207_cov_36.976027_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540356" accession="ERS11142322">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142322</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540356</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745874_virus.174</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745874.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.1281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745874) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566850) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_174_length_72699_cov_33.726570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_174_length_72699_cov_33.726570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745874_virus.174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540357" accession="ERS11142323">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142323</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540357</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745874_virus.447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745874.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.32458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745874) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566850) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_447_length_44444_cov_4.380598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_447_length_44444_cov_4.380598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745414_bin.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900768625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738447_virus.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540358" accession="ERS11142324">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142324</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540358</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745874_virus.914</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745874.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15726046508259311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>131.664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745874) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566850) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_914_length_28452_cov_78.867736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_13_1546__NODE_914_length_28452_cov_78.867736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738199_virus.353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540359" accession="ERS11142325">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142325</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540359</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745876_provirus.94</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745876.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>989.588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745876) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560456) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_22_1919__NODE_94_length_94246_cov_567.890038_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745876_bin.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_22_1919__NODE_94_length_94246_cov_567.890038_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745876_provirus.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540360" accession="ERS11142326">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142326</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540360</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745876_virus.40</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745876.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1517.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745876) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560456) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_22_1919__NODE_40_length_152796_cov_876.808033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_22_1919__NODE_40_length_152796_cov_876.808033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides distasonis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745876_virus.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540361" accession="ERS11142327">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142327</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540361</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745877_provirus.135</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745877.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.6248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745877) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567266) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_135_length_107070_cov_14.077697_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745877_bin.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_135_length_107070_cov_14.077697_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540362" accession="ERS11142328">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142328</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540362</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745877_provirus.9</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll provirus assembled from ERR7745877.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.0044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745877) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567266) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_9_length_349608_cov_34.055506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745877_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_9_length_349608_cov_34.055506_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745877_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540363" accession="ERS11142329">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142329</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540363</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745877_virus.144</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade sp. virus assembled from ERR7745877.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>293.331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745877) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567266) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_144_length_104763_cov_174.908603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6692913385826772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_144_length_104763_cov_174.908603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738572_virus.342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; UAG-readthrough crAss clade sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540364" accession="ERS11142330">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142330</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540364</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745877_virus.538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7745877.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.6622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745877) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567266) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_538_length_54560_cov_41.261972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_538_length_54560_cov_41.261972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738574_virus.239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540365" accession="ERS11142331">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142331</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540365</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745877_virus.709</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7745877.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745877) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567266) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_709_length_45996_cov_68.345108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4090909090909091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_J_3_RAU0015YZ__NODE_709_length_45996_cov_68.345108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745877_virus.709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540366" accession="ERS11142332">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142332</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540366</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745881_provirus.104</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745881.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.0323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745881) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_104_length_150221_cov_20.304155_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745881_bin.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_104_length_150221_cov_20.304155_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-460;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745408_provirus.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540367" accession="ERS11142333">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142333</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540367</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745881_provirus.391</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745881.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745881) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_391_length_78502_cov_11.360179_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745881_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_391_length_78502_cov_11.360179_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540368" accession="ERS11142334">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142334</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540368</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745881_virus.1022</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745881.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>143.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745881) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_1022_length_41010_cov_81.693401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_1022_length_41010_cov_81.693401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540369" accession="ERS11142335">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142335</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540369</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745881_virus.1506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745881.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.9018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745881) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_1506_length_30840_cov_8.902448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_1506_length_30840_cov_8.902448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-451;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745914_virus.1335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540370" accession="ERS11142336">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142336</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540370</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745881_virus.2318</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745881.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03912499170499385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.5208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745881) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_2318_length_22508_cov_32.990058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_2318_length_22508_cov_32.990058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_provirus.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540371" accession="ERS11142337">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142337</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540371</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745881_virus.550</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745881.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>106.054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745881) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_550_length_63491_cov_60.197748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_550_length_63491_cov_60.197748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME192801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp000432135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745881_virus.550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540372" accession="ERS11142338">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142338</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540372</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745881_virus.928</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745881.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745881) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561357) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_928_length_44410_cov_6.973699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_A_5_1505__NODE_928_length_44410_cov_6.973699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__UBA1750;g__UBA7102;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540373" accession="ERS11142339">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142339</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540373</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745884_provirus.437</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus provirus assembled from ERR7745884.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.4824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745884) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567335) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_21_THA0055JZ__NODE_437_length_52778_cov_28.398854_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9310344827586208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_21_THA0055JZ__NODE_437_length_52778_cov_28.398854_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Enterobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745884_provirus.437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540374" accession="ERS11142340">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142340</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540374</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745884_virus.538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Dhillonvirus virus assembled from ERR7745884.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1086.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745884) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567335) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_21_THA0055JZ__NODE_538_length_44618_cov_616.822860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7903225806451613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_21_THA0055JZ__NODE_538_length_44618_cov_616.822860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745884_virus.538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Dhillonvirus; unclassified Dhillonvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540375" accession="ERS11142341">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142341</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540375</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745884_virus.931</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lederbergvirus virus assembled from ERR7745884.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>210.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745884) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567335) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_21_THA0055JZ__NODE_931_length_26947_cov_119.269483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.392156862745098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_F_21_THA0055JZ__NODE_931_length_26947_cov_119.269483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745884_virus.931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Lederbergvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540376" accession="ERS11142342">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142342</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540376</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745888_provirus.425</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745888.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745888) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560445) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_4_1847__NODE_425_length_38365_cov_12.798710_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8529411764705882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_4_1847__NODE_425_length_38365_cov_12.798710_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745888_provirus.425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540377" accession="ERS11142343">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142343</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540377</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745888_virus.370</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phietavirus virus assembled from ERR7745888.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.9404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745888) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560445) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_4_1847__NODE_370_length_41638_cov_55.629653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_4_1847__NODE_370_length_41638_cov_55.629653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Weissella;s__Weissella confusa</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745888_virus.370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Azeredovirinae; Phietavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540378" accession="ERS11142344">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142344</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540378</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745888_virus.675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745888.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12890747351253778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.9211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745888) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560445) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_4_1847__NODE_675_length_26638_cov_63.420654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_4_1847__NODE_675_length_26638_cov_63.420654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Lactobacillaceae;g__Weissella;s__Weissella confusa</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745888_virus.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540379" accession="ERS11142345">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142345</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540379</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_provirus.1227</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>182.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1227_length_61302_cov_107.211482_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1227_length_61302_cov_107.211482_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540380" accession="ERS11142346">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142346</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540380</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_provirus.1996</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.4447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1996_length_45589_cov_42.393523_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745890_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1996_length_45589_cov_42.393523_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540381" accession="ERS11142347">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142347</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540381</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_provirus.275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08997797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.9624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_275_length_131465_cov_17.003455_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745890_bin.403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_275_length_131465_cov_17.003455_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_provirus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540382" accession="ERS11142348">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142348</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540382</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_provirus.5</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540382</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>177.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_5_length_464791_cov_104.454279_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745890_bin.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_5_length_464791_cov_104.454279_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.3188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540383" accession="ERS11142349">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142349</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540383</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_provirus.90</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3636550413062912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.5151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_90_length_215247_cov_17.340554_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745890_bin.241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_90_length_215247_cov_17.340554_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Anaerotignaceae;g__Anaerotignum;s__Anaerotignum faecicola</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540384" accession="ERS11142350">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142350</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540384</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.1143</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12578965839962564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1143_length_63883_cov_10.213632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1143_length_63883_cov_10.213632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.1218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540385" accession="ERS11142351">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142351</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540385</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.1618</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.8851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1618_length_51783_cov_11.547963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_1618_length_51783_cov_11.547963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738617_virus.261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540386" accession="ERS11142352">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142352</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540386</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.2163</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.0769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_2163_length_43191_cov_13.764230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_2163_length_43191_cov_13.764230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0331678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540387" accession="ERS11142353">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142353</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540387</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.2411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_2411_length_39730_cov_7.206743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_2411_length_39730_cov_7.206743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738537_virus.2114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540388" accession="ERS11142354">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142354</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540388</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.2652</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.2143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_2652_length_37369_cov_24.613349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_2652_length_37369_cov_24.613349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738144_virus.294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540389" accession="ERS11142355">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142355</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540389</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.3112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.8575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_3112_length_33193_cov_30.983814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_3112_length_33193_cov_30.983814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0316936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540390" accession="ERS11142356">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142356</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540390</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.3815</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.2976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_3815_length_28612_cov_13.329876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_3815_length_28612_cov_13.329876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900066885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.3815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540391" accession="ERS11142357">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142357</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540391</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.435</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.8414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_435_length_107547_cov_17.097562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_435_length_107547_cov_17.097562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738537_virus.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540392" accession="ERS11142358">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142358</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540392</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.498</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.6454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_498_length_100390_cov_45.052685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.990990990990991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_498_length_100390_cov_45.052685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540393" accession="ERS11142359">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142359</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540393</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.604</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:41Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_604_length_90934_cov_34.167164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.30434782608695654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_604_length_90934_cov_34.167164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540394" accession="ERS11142360">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142360</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540394</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745890_virus.7150</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7745890.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06574293217546884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.2191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745890) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560264) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_7150_length_17442_cov_18.574201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_F_18_2239__NODE_7150_length_17442_cov_18.574201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME027723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus_A;s__Coprococcus_A catus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.5217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540395" accession="ERS11142361">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142361</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540395</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745896_provirus.133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745896.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3203913925708577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.7501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745896) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562242) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_14_1634__NODE_133_length_81856_cov_38.553920_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745896_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_14_1634__NODE_133_length_81856_cov_38.553920_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>Siphoviridae environmental samples</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME138453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Lactococcus;s__Lactococcus lactis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745896_provirus.133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540396" accession="ERS11142362">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142362</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540396</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745896_virus.1103</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lederbergvirus virus assembled from ERR7745896.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>123.613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745896) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562242) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_14_1634__NODE_1103_length_21783_cov_70.250530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_14_1634__NODE_1103_length_21783_cov_70.250530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745896_virus.1103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Lederbergvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540397" accession="ERS11142363">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142363</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540397</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745896_virus.508</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Actinomyces phage xhp1 virus assembled from ERR7745896.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.54622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745896) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562242) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_14_1634__NODE_508_length_34801_cov_5.350190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_14_1634__NODE_508_length_34801_cov_5.350190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME207226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Actinomycetaceae;g__Pauljensenia;s__Pauljensenia sp001838165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745896_virus.508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Actinomyces phage xhp1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540398" accession="ERS11142364">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142364</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540398</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745897_provirus.180</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745897.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540398</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745897) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561483) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_180_length_88101_cov_8.423373_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745897_bin.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_180_length_88101_cov_8.423373_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738233_virus.443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540399" accession="ERS11142365">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142365</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540399</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745897_provirus.604</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745897.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.4333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745897) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561483) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_604_length_46195_cov_27.040483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_604_length_46195_cov_27.040483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738254_virus.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540400" accession="ERS11142366">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142366</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540400</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745897_virus.1180</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745897.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.6122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745897) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561483) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_1180_length_30196_cov_4.998838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_1180_length_30196_cov_4.998838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540401" accession="ERS11142367">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142367</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540401</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745897_virus.3772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7745897.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.9069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745897) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561483) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_3772_length_12784_cov_43.576926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_3772_length_12784_cov_43.576926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900290275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745565_virus.3795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540402" accession="ERS11142368">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142368</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540402</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745897_virus.689</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745897.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745897) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561483) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_689_length_42654_cov_34.081100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_21_1514__NODE_689_length_42654_cov_34.081100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540403" accession="ERS11142369">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142369</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540403</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_provirus.1142</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18718388309858397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.87625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_1142_length_43503_cov_6.055405_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_1142_length_43503_cov_6.055405_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__Ruminococcus sp900540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_provirus.298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540404" accession="ERS11142370">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142370</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540404</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_provirus.231</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_231_length_99659_cov_9.788486_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8787878787878788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_231_length_99659_cov_9.788486_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_C</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_provirus.687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540405" accession="ERS11142371">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142371</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540405</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_provirus.531</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_531_length_67730_cov_6.868047_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745903_bin.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_531_length_67730_cov_6.868047_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540406" accession="ERS11142372">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142372</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540406</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_virus.1035</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus virus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9019553881807648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.6783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_1035_length_45898_cov_14.457585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_1035_length_45898_cov_14.457585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_virus.1035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540407" accession="ERS11142373">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142373</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540407</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_virus.1381</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_1381_length_38567_cov_9.877579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_1381_length_38567_cov_9.877579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540408" accession="ERS11142374">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142374</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540408</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_virus.199</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_199_length_106835_cov_67.858587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_199_length_106835_cov_67.858587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540409" accession="ERS11142375">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142375</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540409</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_virus.330</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_330_length_85399_cov_6.351551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9387755102040816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_330_length_85399_cov_6.351551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_virus.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540410" accession="ERS11142376">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142376</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540410</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745903_virus.8215</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745903.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745903) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561239) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_8215_length_9320_cov_7.071189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-02-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_4_1629__NODE_8215_length_9320_cov_7.071189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME089567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus sp001556435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_virus.8215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540411" accession="ERS11142377">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142377</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540411</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_provirus.1546</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.0977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1546_length_36422_cov_17.162828_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1546_length_36422_cov_17.162828_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_provirus.1546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540412" accession="ERS11142378">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142378</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540412</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_provirus.392</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_392_length_83590_cov_16.216182_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_392_length_83590_cov_16.216182_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_provirus.392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540413" accession="ERS11142379">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142379</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540413</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_provirus.794</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.2264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_794_length_55217_cov_16.691567_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_794_length_55217_cov_16.691567_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__UBA1191 sp900549125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_provirus.794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540414" accession="ERS11142380">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142380</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540414</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.1056</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.4611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1056_length_45871_cov_18.943202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1056_length_45871_cov_18.943202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.1056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540415" accession="ERS11142381">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142381</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540415</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.1237</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1237_length_41886_cov_8.388744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1237_length_41886_cov_8.388744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.1237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540416" accession="ERS11142382">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142382</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540416</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.1349</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.1277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1349_length_39543_cov_21.888917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1349_length_39543_cov_21.888917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.1349</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540417" accession="ERS11142383">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142383</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540417</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.1608</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1608_length_35585_cov_10.239129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_1608_length_35585_cov_10.239129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.1608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540418" accession="ERS11142384">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142384</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540418</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.23489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Inovirus sp. virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0024238520115018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_23489_length_5033_cov_8.836965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_23489_length_5033_cov_8.836965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.23489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Loebvirae; Hofneiviricota; Faserviricetes; Tubulavirales; Inoviridae; Inovirus; unclassified Inovirus; Inovirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540419" accession="ERS11142385">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142385</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540419</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.2853</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08920704845814986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.45944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_2853_length_24841_cov_4.749919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_2853_length_24841_cov_4.749919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_provirus.277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540420" accession="ERS11142386">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142386</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540420</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.3749</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Streptococcus phage Javan351 virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00270137614315428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_3749_length_20437_cov_6.949558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_3749_length_20437_cov_6.949558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME086861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Fusicatenibacter;s__Fusicatenibacter saccharivorans</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.3749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Streptococcus phage Javan351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540421" accession="ERS11142387">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142387</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540421</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.5002</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Gregsiragusavirus virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18368843659633977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_5002_length_16623_cov_5.752146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_5002_length_16623_cov_5.752146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.5002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Guelinviridae; Denniswatsonvirinae; Gregsiragusavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540422" accession="ERS11142388">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142388</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540422</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.684</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05680120315873836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_684_length_60622_cov_13.300074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.945945945945946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_684_length_60622_cov_13.300074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540423" accession="ERS11142389">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142389</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540423</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745906_virus.928</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745906.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10836869769395652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745906) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560435) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_928_length_50295_cov_7.206759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_12_1819__NODE_928_length_50295_cov_7.206759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME116282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-873;s__CAG-873 sp001701165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540424" accession="ERS11142390">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142390</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540424</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745908_provirus.1472</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745908.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745908) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567563) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1472_length_41277_cov_11.626214_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1472_length_41277_cov_11.626214_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0278621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540425" accession="ERS11142391">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142391</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540425</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745908_provirus.608</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745908.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15446967015660706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.9763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745908) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567563) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_608_length_80713_cov_38.919019_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745908_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_608_length_80713_cov_38.919019_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME181333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900540885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0314975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540426" accession="ERS11142392">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142392</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540426</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745908_virus.1183</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745908.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.2116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745908) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567563) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1183_length_49421_cov_7.937155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6388888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1183_length_49421_cov_7.937155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745346_virus.2531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540427" accession="ERS11142393">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142393</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540427</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745908_virus.1546</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745908.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.74324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745908) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567563) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1546_length_39452_cov_5.355124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1546_length_39452_cov_5.355124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738560_virus.593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540428" accession="ERS11142394">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142394</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540428</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745908_virus.1906</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745908.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745908) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567563) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1906_length_32011_cov_7.474698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7083333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_1906_length_32011_cov_7.474698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745908_virus.1906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540429" accession="ERS11142395">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142395</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540429</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745908_virus.2674</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745908.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16462237831743523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745908) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567563) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_2674_length_22816_cov_6.273187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745908_bin.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_2674_length_22816_cov_6.273187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Bacteroides;s__Bacteroides fragilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738272_provirus.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540430" accession="ERS11142396">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142396</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540430</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745908_virus.609</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745908.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.9508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745908) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567563) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_609_length_80647_cov_34.519995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6382978723404256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_D_5_THA1074JZ__NODE_609_length_80647_cov_34.519995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745908_virus.609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540431" accession="ERS11142397">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142397</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540431</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745914_provirus.11</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745914.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745914) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560967) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_11_length_330723_cov_9.929196_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745914_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_11_length_330723_cov_9.929196_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME039567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-632;s__CAG-632 sp900539185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_provirus.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540432" accession="ERS11142398">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142398</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540432</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745914_virus.1023</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745914.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8769553881807648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.5096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745914) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560967) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_1023_length_37423_cov_26.879532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_1023_length_37423_cov_26.879532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745914_virus.1023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540433" accession="ERS11142399">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142399</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540433</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745914_virus.1569</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745914.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540433</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.58515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745914) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560967) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_1569_length_27254_cov_4.849836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_1569_length_27254_cov_4.849836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0261276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540434" accession="ERS11142400">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142400</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540434</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745914_virus.2506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745914.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.8035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745914) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560967) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_2506_length_19225_cov_9.667276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_2506_length_19225_cov_9.667276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_virus.395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540435" accession="ERS11142401">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142401</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540435</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745914_virus.719</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745914.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>482.676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745914) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560967) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_719_length_46995_cov_283.022742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_719_length_46995_cov_283.022742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540436" accession="ERS11142402">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142402</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540436</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745914_virus.843</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745914.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12747797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745914) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560967) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_843_length_42231_cov_10.754733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_2_1553__NODE_843_length_42231_cov_10.754733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540437" accession="ERS11142403">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142403</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540437</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_provirus.1103</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.7462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1103_length_51690_cov_21.516033_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1103_length_51690_cov_21.516033_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540438" accession="ERS11142404">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142404</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540438</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_provirus.321</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02549472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_321_length_104582_cov_12.479250_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_321_length_104582_cov_12.479250_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540439" accession="ERS11142405">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142405</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540439</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_provirus.613</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.26745594713656384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.1628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_613_length_73985_cov_42.771770_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9393939393939394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_613_length_73985_cov_42.771770_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738234_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__TF01-11;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_provirus.613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540440" accession="ERS11142406">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142406</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540440</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.1094</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1094_length_52052_cov_44.043098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1094_length_52052_cov_44.043098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540441" accession="ERS11142407">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142407</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540441</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.1453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>128.423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1453_length_42858_cov_71.654333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4166666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1453_length_42858_cov_71.654333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME020571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp002404795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.1453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540442" accession="ERS11142408">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142408</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540442</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.1653</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1653_length_39374_cov_7.810622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1653_length_39374_cov_7.810622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738271_virus.1435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540443" accession="ERS11142409">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142409</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540443</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.1768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1768_length_37780_cov_11.405034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_1768_length_37780_cov_11.405034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.1126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540444" accession="ERS11142410">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142410</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540444</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.2026</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.4247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_2026_length_34058_cov_25.067773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_2026_length_34058_cov_25.067773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.1992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540445" accession="ERS11142411">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142411</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540445</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.2723</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3616465191540744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.9974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_2723_length_27120_cov_28.300189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_2723_length_27120_cov_28.300189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738227_bin.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp004557855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.2723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540446" accession="ERS11142412">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142412</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540446</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.3768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_3768_length_20594_cov_8.127602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_3768_length_20594_cov_8.127602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.3768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540447" accession="ERS11142413">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142413</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540447</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.679</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.5628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_679_length_69852_cov_28.313579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8636363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_679_length_69852_cov_28.313579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745918_virus.679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540448" accession="ERS11142414">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142414</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540448</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745918_virus.970</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745918.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>160.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745918) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561744) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_970_length_56550_cov_94.704372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_9_1249__NODE_970_length_56550_cov_94.704372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000434975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.1267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540449" accession="ERS11142415">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142415</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540449</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745919_provirus.396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745919.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04234581497797356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.8203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745919) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_396_length_77562_cov_13.382164_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745919_bin.204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_396_length_77562_cov_13.382164_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.2010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540450" accession="ERS11142416">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142416</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540450</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745919_virus.1051</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745919.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745919) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_1051_length_37158_cov_6.775276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_1051_length_37158_cov_6.775276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME192801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp000432135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.1634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540451" accession="ERS11142417">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142417</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540451</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745919_virus.1662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745919.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745919) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_1662_length_26215_cov_17.884919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_1662_length_26215_cov_17.884919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_virus.1662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540452" accession="ERS11142418">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142418</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540452</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745919_virus.2444</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745919.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745919) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_2444_length_19191_cov_5.859527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_2444_length_19191_cov_5.859527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;s__Ruminiclostridium_E siraeum</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_virus.2095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540453" accession="ERS11142419">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142419</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540453</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745919_virus.509</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745919.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745919) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_509_length_65292_cov_12.157954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_509_length_65292_cov_12.157954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738611_provirus.784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540454" accession="ERS11142420">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142420</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540454</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745919_virus.784</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745919.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.5435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745919) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_784_length_47510_cov_42.172369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_784_length_47510_cov_42.172369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746349_bin.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Sphaerochaetales;f__Sphaerochaetaceae;g__UBA5920;s__UBA5920 sp002406055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_virus.784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540455" accession="ERS11142421">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142421</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540455</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745919_virus.956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745919.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.0198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745919) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560944) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_956_length_40534_cov_14.496181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_7_1547__NODE_956_length_40534_cov_14.496181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_virus.1795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540456" accession="ERS11142422">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142422</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540456</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_provirus.1937</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07305889139359004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.12906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_1937_length_30019_cov_5.355554_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_1937_length_30019_cov_5.355554_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747782_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola sp000434735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738160_provirus.796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540457" accession="ERS11142423">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142423</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540457</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_provirus.559</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.1078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_559_length_64390_cov_16.921151_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745922_bin.126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_559_length_64390_cov_16.921151_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540458" accession="ERS11142424">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142424</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540458</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_virus.1549</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.91391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_1549_length_34857_cov_5.196521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745922_bin.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_1549_length_34857_cov_5.196521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746344_virus.2132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540459" accession="ERS11142425">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142425</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540459</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_virus.204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540459</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>140.708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_204_length_103845_cov_83.050362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9117647058823528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_204_length_103845_cov_83.050362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737971_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900548745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745384_virus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540460" accession="ERS11142426">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142426</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540460</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_virus.2448</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_2448_length_24907_cov_7.055658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_2448_length_24907_cov_7.055658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745922_virus.2448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540461" accession="ERS11142427">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142427</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540461</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_virus.31</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.5918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_31_length_220447_cov_14.539298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5535714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_31_length_220447_cov_14.539298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540462" accession="ERS11142428">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142428</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540462</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_virus.5260</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.8662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_5260_length_13011_cov_18.004661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_5260_length_13011_cov_18.004661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745384_virus.4192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540463" accession="ERS11142429">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142429</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540463</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745922_virus.986</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745922.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11194438650927264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.5325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745922) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559426) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_986_length_46724_cov_15.441679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_10_2644__NODE_986_length_46724_cov_15.441679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745389_virus.471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540464" accession="ERS11142430">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142430</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540464</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_provirus.1206</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>172.259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_1206_length_70579_cov_99.421208_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_1206_length_70579_cov_99.421208_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger formicilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0250587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540465" accession="ERS11142431">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142431</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540465</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_provirus.1649</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>308.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_1649_length_58097_cov_176.062634_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745924_bin.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_1649_length_58097_cov_176.062634_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.1649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540466" accession="ERS11142432">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142432</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540466</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_provirus.2264</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_2264_length_47720_cov_10.626640_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745924_bin.435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_2264_length_47720_cov_10.626640_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738600_bin.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__RUG14107;s__RUG14107 sp900772685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_provirus.624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540467" accession="ERS11142433">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142433</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540467</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_provirus.3189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_3189_length_37261_cov_9.113651_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745924_bin.388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4705882352941176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_3189_length_37261_cov_9.113651_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.3189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540468" accession="ERS11142434">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142434</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540468</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_provirus.456</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:40Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.2824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_456_length_124164_cov_22.526622_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3157894736842105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_456_length_124164_cov_22.526622_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540469" accession="ERS11142435">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142435</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540469</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_provirus.784</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.6624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_784_length_90490_cov_11.682048_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9210526315789472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_784_length_90490_cov_11.682048_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745924_bin.388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540470" accession="ERS11142436">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142436</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540470</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_provirus.92</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_92_length_242120_cov_8.312589_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745924_bin.226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8108108108108109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_92_length_242120_cov_8.312589_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_provirus.1478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540471" accession="ERS11142437">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142437</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540471</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.13595</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Astrithrvirus virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06876689984850125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.1471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_13595_length_11466_cov_50.306419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_13595_length_11466_cov_50.306419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.9313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540472" accession="ERS11142438">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142438</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540472</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.1805</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.1483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_1805_length_55184_cov_29.984920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_1805_length_55184_cov_29.984920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.1805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540473" accession="ERS11142439">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142439</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540473</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.2221</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_2221_length_48324_cov_12.316413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_2221_length_48324_cov_12.316413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME255303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales_A;f__UBA9506;g__UMGS755;s__UMGS755 sp900545185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540474" accession="ERS11142440">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142440</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540474</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.2727</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3093893088856105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_2727_length_41728_cov_5.546421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_2727_length_41728_cov_5.546421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Romboutsia;s__Romboutsia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.2727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540475" accession="ERS11142441">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142441</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540475</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.3018</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.58178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_3018_length_38895_cov_5.401051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_3018_length_38895_cov_5.401051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0319855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540476" accession="ERS11142442">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142442</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540476</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.3464</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09375054236578688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.13485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_3464_length_35064_cov_5.119273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_3464_length_35064_cov_5.119273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.3464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540477" accession="ERS11142443">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142443</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540477</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.4003</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13997797356828195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_4003_length_31391_cov_9.617392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_4003_length_31391_cov_9.617392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.1895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540478" accession="ERS11142444">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142444</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540478</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.4390</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_4390_length_29107_cov_11.158836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8620689655172413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_4390_length_29107_cov_11.158836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.4390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540479" accession="ERS11142445">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142445</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540479</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.57</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_57_length_296491_cov_31.152968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.49122807017543857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_57_length_296491_cov_31.152968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540480" accession="ERS11142446">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142446</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540480</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.6600</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11938325991189436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540480</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.0009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_6600_length_21142_cov_16.957845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_6600_length_21142_cov_16.957845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides sp900549585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.6600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540481" accession="ERS11142447">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142447</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540481</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745924_virus.754</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745924.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745924) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567885) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_754_length_92680_cov_16.627161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_24_1660__NODE_754_length_92680_cov_16.627161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540482" accession="ERS11142448">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142448</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540482</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_provirus.1045</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.0148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_1045_length_37508_cov_5.436643_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745937_bin.130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_1045_length_37508_cov_5.436643_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745937_provirus.1045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540483" accession="ERS11142449">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142449</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540483</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_provirus.275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.3855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_275_length_82133_cov_19.460637_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745937_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_275_length_82133_cov_19.460637_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_provirus.428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540484" accession="ERS11142450">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142450</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540484</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_provirus.768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.8182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_768_length_45993_cov_29.121287_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745937_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_768_length_45993_cov_29.121287_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME118160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900552515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745937_provirus.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540485" accession="ERS11142451">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142451</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540485</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_virus.1255</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_1255_length_33215_cov_6.913362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_1255_length_33215_cov_6.913362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745937_virus.1255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540486" accession="ERS11142452">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142452</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540486</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_virus.1533</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe virus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.52297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_1533_length_29163_cov_4.966444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48484848484848486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_1533_length_29163_cov_4.966444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0373039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540487" accession="ERS11142453">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142453</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540487</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_virus.209</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_209_length_95368_cov_8.222728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6296296296296297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_209_length_95368_cov_8.222728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745937_virus.209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540488" accession="ERS11142454">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142454</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540488</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_virus.2687</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_2687_length_19393_cov_12.324550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_2687_length_19393_cov_12.324550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738581_bin.271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.3018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540489" accession="ERS11142455">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142455</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540489</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_virus.411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0932089863334723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540489</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.72347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_411_length_66482_cov_5.624772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_411_length_66482_cov_5.624772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900545925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738180_virus.139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540490" accession="ERS11142456">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142456</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540490</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_virus.539</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540490</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_539_length_57533_cov_7.599641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_539_length_57533_cov_7.599641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__P3;g__UBA3388;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746233_virus.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540491" accession="ERS11142457">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142457</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540491</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745937_virus.849</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745937.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540491</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.8789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745937) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561471) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_849_length_42510_cov_21.474112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_23_1513__NODE_849_length_42510_cov_21.474112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745937_virus.849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540492" accession="ERS11142458">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142458</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540492</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_provirus.1203</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1203_length_67565_cov_7.502949_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1203_length_67565_cov_7.502949_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_provirus.1203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540493" accession="ERS11142459">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142459</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540493</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_provirus.198</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.3078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_198_length_164666_cov_17.009253_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745947_bin.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.951219512195122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_198_length_164666_cov_17.009253_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540494" accession="ERS11142460">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142460</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540494</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_provirus.293</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.5703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_293_length_141518_cov_8.942061_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745947_bin.204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_293_length_141518_cov_8.942061_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Verrucomicrobiota;c__Kiritimatiellae;o__RFP12;f__UBA1067;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_provirus.293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540495" accession="ERS11142461">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142461</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540495</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_provirus.772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09482695118618886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_772_length_88209_cov_8.410044_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745947_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_772_length_88209_cov_8.410044_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.1123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540496" accession="ERS11142462">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142462</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540496</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.1221</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1221_length_67010_cov_6.351635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1221_length_67010_cov_6.351635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.1221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540497" accession="ERS11142463">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142463</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540497</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.1411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.4905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1411_length_61462_cov_31.008210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8611111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1411_length_61462_cov_31.008210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540498" accession="ERS11142464">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142464</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540498</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.1733</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1733_length_53856_cov_25.548467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1733_length_53856_cov_25.548467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7803603_bin.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.1733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540499" accession="ERS11142465">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142465</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540499</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.1950</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540499</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.8743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1950_length_49625_cov_23.113890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5272727272727272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_1950_length_49625_cov_23.113890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0328330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540500" accession="ERS11142466">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142466</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540500</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.21933</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_21933_length_6562_cov_7.395220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_21933_length_6562_cov_7.395220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME272760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000432515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.17006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540501" accession="ERS11142467">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142467</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540501</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.2322</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.3865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2322_length_44386_cov_26.394705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2322_length_44386_cov_26.394705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.2030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540502" accession="ERS11142468">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142468</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540502</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.2518</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4629947229551451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6120.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2518_length_42218_cov_3386.104744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2518_length_42218_cov_3386.104744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0280262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540503" accession="ERS11142469">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142469</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540503</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.2698</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>67.5134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2698_length_40135_cov_40.210395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2698_length_40135_cov_40.210395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.1206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540504" accession="ERS11142470">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142470</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540504</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.2835</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.02296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2835_length_38776_cov_3.799607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_2835_length_38776_cov_3.799607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-791;s__CAG-791 sp000431495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738565_virus.855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540505" accession="ERS11142471">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142471</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540505</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.3099</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.9507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_3099_length_36009_cov_43.589669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_3099_length_36009_cov_43.589669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME116287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__UBA10281;s__UBA10281 sp900767815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.1534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540506" accession="ERS11142472">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142472</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540506</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.330</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.8801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_330_length_136244_cov_18.078169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7111111111111111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_330_length_136244_cov_18.078169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540507" accession="ERS11142473">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142473</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540507</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.3506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_3506_length_32835_cov_13.463673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7297297297297297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_3506_length_32835_cov_13.463673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0335534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540508" accession="ERS11142474">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142474</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540508</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.3857</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>616.679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_3857_length_30419_cov_357.328851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_3857_length_30419_cov_357.328851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.3857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540509" accession="ERS11142475">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142475</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540509</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.4797</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13111098844285518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_4797_length_25309_cov_9.863705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745947_bin.351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_4797_length_25309_cov_9.863705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales;f__Monoglobaceae;g__UMGS1820;s__UMGS1820 sp900545865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.4797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540510" accession="ERS11142476">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142476</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540510</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.558</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade sp. virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540510</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.6565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_558_length_104403_cov_19.165615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_558_length_104403_cov_19.165615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; UAG-readthrough crAss clade sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540511" accession="ERS11142477">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142477</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540511</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.6227</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_6227_length_20138_cov_26.923234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_6227_length_20138_cov_26.923234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.3351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540512" accession="ERS11142478">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142478</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540512</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.7715</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_7715_length_16646_cov_7.816344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_7715_length_16646_cov_7.816344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738534_virus.4069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540513" accession="ERS11142479">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142479</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540513</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745947_virus.969</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745947.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>395.792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745947) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567962) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_969_length_76576_cov_224.019020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_20_1180__NODE_969_length_76576_cov_224.019020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540514" accession="ERS11142480">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142480</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540514</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_provirus.32</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_32_length_218761_cov_11.056579_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745948_bin.177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_32_length_218761_cov_11.056579_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0349911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540515" accession="ERS11142481">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142481</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540515</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_provirus.685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.6194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_685_length_59024_cov_30.376932_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745948_bin.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_685_length_59024_cov_30.376932_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745382_virus.1139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540516" accession="ERS11142482">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142482</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540516</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_virus.1382</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09853065807331889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.2975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_1382_length_37973_cov_16.518340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_1382_length_37973_cov_16.518340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745922_virus.390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540517" accession="ERS11142483">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142483</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540517</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_virus.1734</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_1734_length_32740_cov_6.935401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_1734_length_32740_cov_6.935401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745760_bin.144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__CAG-238;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746344_virus.542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540518" accession="ERS11142484">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142484</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540518</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_virus.2200</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_2200_length_27125_cov_7.565033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_2200_length_27125_cov_7.565033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0307965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540519" accession="ERS11142485">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142485</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540519</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_virus.291</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1534374812512915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>365.372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_291_length_91927_cov_231.427022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9574468085106383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_291_length_91927_cov_231.427022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_virus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540520" accession="ERS11142486">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142486</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540520</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_virus.5247</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.1024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_5247_length_13011_cov_17.334199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_5247_length_13011_cov_17.334199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745384_virus.4192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540521" accession="ERS11142487">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142487</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540521</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745948_virus.945</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745948.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745948) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559389) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_945_length_48607_cov_10.014692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745948_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.78125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_18_2640__NODE_945_length_48607_cov_10.014692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540522" accession="ERS11142488">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142488</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540522</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_provirus.1732</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae provirus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.1477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1732_length_48219_cov_14.422666_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1732_length_48219_cov_14.422666_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A succinatutens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_provirus.1797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540523" accession="ERS11142489">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142489</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540523</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_provirus.2727</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07637530562347188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.5555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2727_length_33823_cov_12.146032_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2727_length_33823_cov_12.146032_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_provirus.2727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540524" accession="ERS11142490">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142490</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540524</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_provirus.640</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11745594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_640_length_95677_cov_17.035356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_640_length_95677_cov_17.035356_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_provirus.640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540525" accession="ERS11142491">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142491</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540525</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.1014</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02597739359623315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1014_length_71226_cov_9.136558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1014_length_71226_cov_9.136558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540526" accession="ERS11142492">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142492</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540526</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.1359</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1359_length_57503_cov_7.405147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1359_length_57503_cov_7.405147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738627_virus.666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540527" accession="ERS11142493">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142493</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540527</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.1824</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.6642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1824_length_46302_cov_33.124565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5652173913043478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_1824_length_46302_cov_33.124565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540528" accession="ERS11142494">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142494</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540528</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.2000</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.6943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2000_length_43229_cov_14.665044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7096774193548387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2000_length_43229_cov_14.665044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.2000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540529" accession="ERS11142495">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142495</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540529</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.2246</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:39Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.2027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2246_length_39299_cov_6.781704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2246_length_39299_cov_6.781704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.2054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540530" accession="ERS11142496">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142496</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540530</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.2504</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.9854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2504_length_36378_cov_22.595714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6086956521739131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2504_length_36378_cov_22.595714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738568_bin.326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__SFEL01;s__SFEL01 sp004557245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_provirus.614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540531" accession="ERS11142497">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142497</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540531</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.2793</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.5073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2793_length_33179_cov_22.517340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_2793_length_33179_cov_22.517340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0365260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540532" accession="ERS11142498">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142498</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540532</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.3097</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.40773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_3097_length_30248_cov_3.853899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_3097_length_30248_cov_3.853899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746345_bin.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-460;s__CAG-460 sp000437315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.3097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540533" accession="ERS11142499">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142499</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540533</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.35024</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.11528</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_35024_length_3377_cov_2.813333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_35024_length_3377_cov_2.813333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.35024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540534" accession="ERS11142500">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142500</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540534</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.4658</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.12887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_4658_length_21203_cov_4.513443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_4658_length_21203_cov_4.513443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_virus.1931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540535" accession="ERS11142501">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142501</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540535</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.6366</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.54331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_6366_length_15896_cov_4.093053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_6366_length_15896_cov_4.093053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745951_virus.6366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540536" accession="ERS11142502">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142502</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540536</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745951_virus.908</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745951.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11745594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.55803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745951) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561784) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_908_length_76866_cov_5.227910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_8_1096__NODE_908_length_76866_cov_5.227910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__ML615J-28;f__CAG-698;g__CAG-698;s__CAG-698 sp000431235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747128_virus.210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540537" accession="ERS11142503">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142503</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540537</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745953_provirus.339</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_339_length_84981_cov_8.729706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745953_bin.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_339_length_84981_cov_8.729706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738623_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.1368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540538" accession="ERS11142504">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142504</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540538</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745953_provirus.945</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses provirus assembled from ERR7745953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01745594713656387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.2495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_945_length_44343_cov_15.735937_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_945_length_44343_cov_15.735937_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_provirus.835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540539" accession="ERS11142505">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142505</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540539</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745953_virus.1327</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.3683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_1327_length_35574_cov_28.594219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_1327_length_35574_cov_28.594219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.1155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540540" accession="ERS11142506">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142506</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540540</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745953_virus.2609</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Picovirinae virus assembled from ERR7745953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0847458242767311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540540</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_2609_length_21342_cov_5.906701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_2609_length_21342_cov_5.906701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.5756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae; Picovirinae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540541" accession="ERS11142507">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142507</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540541</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745953_virus.547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745953.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745953) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561484) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_547_length_63777_cov_9.414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9487179487179488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_9_1520__NODE_547_length_63777_cov_9.414097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_virus.423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540542" accession="ERS11142508">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142508</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540542</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_provirus.108</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>208.684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_108_length_193356_cov_125.053669_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745963_bin.406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_108_length_193356_cov_125.053669_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738626_bin.272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Methanobacteriota;c__Methanobacteria;o__Methanobacteriales;f__Methanobacteriaceae;g__Methanobrevibacter_A;s__Methanobrevibacter_A smithii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745613_provirus.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540543" accession="ERS11142509">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142509</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540543</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_provirus.1801</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1801_length_46772_cov_10.909391_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7586206896551724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1801_length_46772_cov_10.909391_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0272411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540544" accession="ERS11142510">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142510</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540544</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_provirus.330</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.1798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_330_length_122091_cov_20.701993_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745963_bin.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_330_length_122091_cov_20.701993_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME266586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-180;s__CAG-180 sp000432435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738150_provirus.859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540545" accession="ERS11142511">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142511</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540545</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_provirus.762</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.2924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_762_length_77163_cov_12.679358_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745963_bin.133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_762_length_77163_cov_12.679358_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_provirus.762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540546" accession="ERS11142512">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142512</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540546</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.1297</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.7629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1297_length_57340_cov_24.290135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745963_bin.171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6216216216216216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1297_length_57340_cov_24.290135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.1297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540547" accession="ERS11142513">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142513</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540547</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.1562</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1562_length_51154_cov_10.055230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6451612903225806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1562_length_51154_cov_10.055230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.1562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540548" accession="ERS11142514">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142514</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540548</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.1836</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1836_length_46129_cov_5.951880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_1836_length_46129_cov_5.951880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.1836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540549" accession="ERS11142515">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142515</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540549</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.2028</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>209.051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2028_length_43384_cov_126.294756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2028_length_43384_cov_126.294756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738150_virus.405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540550" accession="ERS11142516">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142516</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540550</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_219_length_146897_cov_8.378048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_219_length_146897_cov_8.378048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540551" accession="ERS11142517">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142517</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540551</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.2383</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02241189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2383_length_38766_cov_12.827548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2383_length_38766_cov_12.827548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.1195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540552" accession="ERS11142518">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142518</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540552</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.2673</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.0071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2673_length_35912_cov_9.720050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2673_length_35912_cov_9.720050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745963_virus.2673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540553" accession="ERS11142519">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142519</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540553</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.2946</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08869765791341377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2946_length_33596_cov_11.533309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4516129032258064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_2946_length_33596_cov_11.533309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540554" accession="ERS11142520">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142520</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540554</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.3540</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.7601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_3540_length_29458_cov_3.837684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_3540_length_29458_cov_3.837684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_provirus.514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540555" accession="ERS11142521">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142521</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540555</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.442</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.8377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_442_length_104144_cov_13.116156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9078947368421052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_442_length_104144_cov_13.116156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540556" accession="ERS11142522">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142522</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540556</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.5409</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.63722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_5409_length_21535_cov_3.838895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_5409_length_21535_cov_3.838895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.1803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540557" accession="ERS11142523">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142523</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540557</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745963_virus.78497</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745963.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>69.6954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745963) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567841) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_78497_length_2550_cov_44.353013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_6_2233__NODE_78497_length_2550_cov_44.353013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.13523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540558" accession="ERS11142524">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142524</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540558</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745967_provirus.24</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses provirus assembled from ERR7745967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.7773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_24_length_176691_cov_47.699780_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_24_length_176691_cov_47.699780_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738657_provirus.203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540559" accession="ERS11142525">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142525</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540559</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745967_provirus.673</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>260.492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_673_length_42890_cov_153.482610_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_673_length_42890_cov_153.482610_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746757_provirus.936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540560" accession="ERS11142526">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142526</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540560</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745967_virus.1669</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08312057094440387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_1669_length_22926_cov_12.342991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_1669_length_22926_cov_12.342991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540561" accession="ERS11142527">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142527</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540561</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745967_virus.342</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>180.081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_342_length_61887_cov_103.745996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.35294117647058826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_342_length_61887_cov_103.745996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745967_virus.342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540562" accession="ERS11142528">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142528</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540562</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745967_virus.662</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745967.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745967) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560928) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_662_length_43357_cov_6.635305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_E_3_1554__NODE_662_length_43357_cov_6.635305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738147_virus.298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540563" accession="ERS11142529">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142529</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540563</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745970_provirus.336</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559772) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_336_length_81211_cov_12.470210_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_336_length_81211_cov_12.470210_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540564" accession="ERS11142530">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142530</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540564</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745970_virus.11700</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7745970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.0206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559772) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_11700_length_5402_cov_25.069484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_11700_length_5402_cov_25.069484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746757_bin.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Elusimicrobiota;c__Elusimicrobia;o__Elusimicrobiales;f__Elusimicrobiaceae;g__UBA1436;s__UBA1436 sp900541355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.11700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540565" accession="ERS11142531">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142531</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540565</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745970_virus.1468</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7745970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.29701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559772) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_1468_length_32371_cov_3.386357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_1468_length_32371_cov_3.386357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-791;s__CAG-791 sp000431495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540566" accession="ERS11142532">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142532</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540566</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745970_virus.3045</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7745970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.58111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559772) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_3045_length_17200_cov_5.561350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_3045_length_17200_cov_5.561350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.3045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540567" accession="ERS11142533">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142533</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540567</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745970_virus.577</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.70788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559772) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_577_length_59820_cov_5.670271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_577_length_59820_cov_5.670271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738568_virus.1165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540568" accession="ERS11142534">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142534</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540568</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745970_virus.853</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745970.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745970) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559772) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_853_length_47185_cov_7.747071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_22_2354__NODE_853_length_47185_cov_7.747071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738173_bin.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFFH01;s__SFFH01 sp900542445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745970_virus.853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540569" accession="ERS11142535">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142535</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540569</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_provirus.1167</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>237.686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_1167_length_64025_cov_138.506208_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_1167_length_64025_cov_138.506208_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_provirus.854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540570" accession="ERS11142536">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142536</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540570</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_provirus.148</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.8597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_148_length_181911_cov_7.731233_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745975_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_148_length_181911_cov_7.731233_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-317;s__CAG-317 sp000433215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_provirus.148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540571" accession="ERS11142537">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142537</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540571</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_provirus.2143</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>184.635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2143_length_44168_cov_116.751650_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2143_length_44168_cov_116.751650_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_provirus.1146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540572" accession="ERS11142538">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142538</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540572</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_provirus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.3568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_272_length_138457_cov_43.388394_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_272_length_138457_cov_43.388394_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540573" accession="ERS11142539">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142539</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540573</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_provirus.42</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.0302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_42_length_292727_cov_47.094639_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5616438356164384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_42_length_292727_cov_47.094639_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_provirus.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540574" accession="ERS11142540">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142540</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540574</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_provirus.769</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.2188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_769_length_81474_cov_31.804170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745975_bin.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_769_length_81474_cov_31.804170_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_provirus.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540575" accession="ERS11142541">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142541</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540575</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.11148</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_11148_length_12929_cov_8.261671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_11148_length_12929_cov_8.261671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.9454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540576" accession="ERS11142542">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142542</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540576</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08243392070484583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>366.074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_145_length_183584_cov_210.504591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5849056603773585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_145_length_183584_cov_210.504591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_virus.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540577" accession="ERS11142543">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142543</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540577</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.1868</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.8935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_1868_length_48558_cov_21.462614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_1868_length_48558_cov_21.462614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_virus.1868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540578" accession="ERS11142544">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142544</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540578</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.2104</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2104_length_44838_cov_8.050446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2104_length_44838_cov_8.050446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.1695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540579" accession="ERS11142545">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142545</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540579</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.2340</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:37Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2340_length_41953_cov_11.341699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2340_length_41953_cov_11.341699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738561_bin.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UBA1191;s__UBA1191 sp900066305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_virus.2340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540580" accession="ERS11142546">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142546</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540580</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.2552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0498</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2552_length_39883_cov_12.032684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2552_length_39883_cov_12.032684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.1515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540581" accession="ERS11142547">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142547</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540581</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.2745</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2745_length_38116_cov_17.280659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8214285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_2745_length_38116_cov_17.280659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_virus.2745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540582" accession="ERS11142548">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142548</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540582</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.3080</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18548688392358637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.7536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_3080_length_35518_cov_16.346745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_3080_length_35518_cov_16.346745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.2054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540583" accession="ERS11142549">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142549</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540583</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.3424</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.8749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_3424_length_32966_cov_40.123962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_3424_length_32966_cov_40.123962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_virus.503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540584" accession="ERS11142550">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142550</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540584</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.3777</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>614.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_3777_length_30956_cov_355.853817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_3777_length_30956_cov_355.853817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745559_virus.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540585" accession="ERS11142551">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142551</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540585</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.429</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_429_length_110210_cov_20.030291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6792452830188679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_429_length_110210_cov_20.030291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Eubacterium_R;s__Eubacterium_R sp000436835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_provirus.252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540586" accession="ERS11142552">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142552</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540586</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.5628</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06452230274393414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.81198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_5628_length_22743_cov_4.970529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745975_bin.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_5628_length_22743_cov_4.970529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_provirus.1099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540587" accession="ERS11142553">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142553</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540587</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.6406</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_6406_length_20543_cov_22.063129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745975_bin.302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_6406_length_20543_cov_22.063129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.5792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540588" accession="ERS11142554">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142554</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540588</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745975_virus.851</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745975.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540588</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.0181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745975) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567797) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_851_length_76690_cov_33.053607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_18_1662__NODE_851_length_76690_cov_33.053607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746051_bin.216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger variabilis_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540589" accession="ERS11142555">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142555</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540589</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745985_provirus.94</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745985.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.5207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745985) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567256) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_94_length_151936_cov_51.376757_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745985_bin.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_94_length_151936_cov_51.376757_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0350419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540590" accession="ERS11142556">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142556</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540590</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745985_virus.1198</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745985.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.2474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745985) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567256) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_1198_length_34569_cov_20.985910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_1198_length_34569_cov_20.985910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745985_virus.1198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540591" accession="ERS11142557">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142557</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540591</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745985_virus.189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745985.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>158.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745985) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567256) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_189_length_108518_cov_96.018268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8235294117647058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_189_length_108518_cov_96.018268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738265_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp900540255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738472_virus.372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540592" accession="ERS11142558">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142558</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540592</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745985_virus.3618</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745985.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03714174109185703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745985) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567256) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_3618_length_13674_cov_8.972126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_3618_length_13674_cov_8.972126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745985_virus.3618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540593" accession="ERS11142559">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142559</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540593</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745985_virus.606</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bordetella virus BPP1 virus assembled from ERR7745985.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.0334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745985) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567256) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_606_length_56104_cov_29.052261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745985_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43478260869565216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_606_length_56104_cov_29.052261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747782_bin.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Duodenibacillus;s__Duodenibacillus sp900544255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745985_virus.606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Rauchvirus; Bordetella virus BPP1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540594" accession="ERS11142560">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142560</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540594</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745985_virus.811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Clostridium phage phi8074-B1 virus assembled from ERR7745985.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>239.731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745985) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567256) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_811_length_46449_cov_146.352411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Nepal</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>28.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>84.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1371/journal.pbio.2005396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2787378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Nepal_MoBio_Fiber-Hadza-Nepal_B_1_RAJ1011YZ__NODE_811_length_46449_cov_146.352411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Romboutsia;s__Romboutsia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745985_virus.811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Clostridium phage phi8074-B1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540595" accession="ERS11142561">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142561</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540595</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745989_provirus.1210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745989.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.1182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745989) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1210_length_37194_cov_10.255220_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8709677419354839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1210_length_37194_cov_10.255220_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_provirus.1478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540596" accession="ERS11142562">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142562</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540596</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745989_provirus.433</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745989.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.98106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745989) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_433_length_69058_cov_5.374060_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_433_length_69058_cov_5.374060_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745414_provirus.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540597" accession="ERS11142563">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142563</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540597</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745989_virus.1012</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745989.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745989) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1012_length_40946_cov_9.506790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745989_bin.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1012_length_40946_cov_9.506790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_virus.1703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540598" accession="ERS11142564">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142564</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540598</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745989_virus.1331</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745989.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540598</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.8853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745989) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1331_length_34951_cov_7.108276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1331_length_34951_cov_7.108276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745963_bin.410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp001916075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_virus.1331</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540599" accession="ERS11142565">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142565</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540599</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745989_virus.1937</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745989.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.6378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745989) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1937_length_27710_cov_23.217855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745989_bin.204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_1937_length_27710_cov_23.217855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540600" accession="ERS11142566">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142566</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540600</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745989_virus.3352</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745989.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.89571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745989) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_3352_length_18828_cov_4.231188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_3352_length_18828_cov_4.231188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_provirus.1146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540601" accession="ERS11142567">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142567</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540601</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745989_virus.628</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745989.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745989) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566954) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_628_length_55221_cov_7.154795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_F_3_1566__NODE_628_length_55221_cov_7.154795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_virus.391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540602" accession="ERS11142568">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142568</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540602</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_provirus.1036</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09471365638766524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>233.244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1036_length_73573_cov_140.019919_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1036_length_73573_cov_140.019919_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME023798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900543155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_provirus.1656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540603" accession="ERS11142569">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142569</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540603</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_provirus.1559</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.9127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1559_length_58071_cov_16.748284_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1559_length_58071_cov_16.748284_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Elusimicrobiota;c__Elusimicrobia;o__Elusimicrobiales;f__Elusimicrobiaceae;g__UBA1436;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_provirus.1559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540604" accession="ERS11142570">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142570</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540604</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_provirus.217</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_217_length_156139_cov_6.978547_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745990_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_217_length_156139_cov_6.978547_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_provirus.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540605" accession="ERS11142571">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142571</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540605</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_provirus.394</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.2577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_394_length_125162_cov_15.233649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745990_bin.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_394_length_125162_cov_15.233649_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_provirus.979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540606" accession="ERS11142572">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142572</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540606</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_provirus.766</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>88.9717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_766_length_88459_cov_51.632832_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745990_bin.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_766_length_88459_cov_51.632832_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_provirus.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540607" accession="ERS11142573">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142573</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540607</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.1236</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.8181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1236_length_66342_cov_35.861345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1236_length_66342_cov_35.861345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__UBA1232;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.1236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540608" accession="ERS11142574">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142574</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540608</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.1400</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.84226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1400_length_61631_cov_5.003932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6410256410256411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1400_length_61631_cov_5.003932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540609" accession="ERS11142575">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142575</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540609</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.160</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540609</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>263.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_160_length_178634_cov_151.544314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_160_length_178634_cov_151.544314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737970_virus.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540610" accession="ERS11142576">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142576</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540610</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.1950</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540610</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.9771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1950_length_50434_cov_22.038942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4642857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_1950_length_50434_cov_22.038942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_F;s__Eubacterium_F sp003491505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_virus.1551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540611" accession="ERS11142577">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142577</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540611</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.2192</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_2192_length_46775_cov_15.132896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745990_bin.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_2192_length_46775_cov_15.132896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_virus.1099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540612" accession="ERS11142578">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142578</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540612</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.2508</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_2508_length_42383_cov_12.856025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_2508_length_42383_cov_12.856025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.2508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540613" accession="ERS11142579">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142579</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540613</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.2928</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.6297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_2928_length_38122_cov_5.595689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_2928_length_38122_cov_5.595689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.2928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540614" accession="ERS11142580">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142580</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540614</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.3151</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_3151_length_36021_cov_19.675968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_3151_length_36021_cov_19.675968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME284285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-194;s__CAG-194 sp000432915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745845_virus.2255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540615" accession="ERS11142581">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142581</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540615</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.3758</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>187.293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_3758_length_31196_cov_108.879109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_3758_length_31196_cov_108.879109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_virus.631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540616" accession="ERS11142582">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142582</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540616</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.4658</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07720665318800916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_4658_length_25986_cov_6.344784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745990_bin.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_4658_length_25986_cov_6.344784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540617" accession="ERS11142583">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142583</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540617</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.5424</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1264047273878133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.34553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_5424_length_22773_cov_4.683909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_5424_length_22773_cov_4.683909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME269197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__KLE1615;s__KLE1615 sp900066985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.5424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540618" accession="ERS11142584">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142584</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540618</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.655</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_655_length_96473_cov_7.843074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_655_length_96473_cov_7.843074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745990_virus.655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540619" accession="ERS11142585">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142585</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540619</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745990_virus.80</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7745990.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745990) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559405) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_80_length_247842_cov_8.147164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9210526315789472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_14_2624__NODE_80_length_247842_cov_8.147164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__Ruminococcus sp900540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1936415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540620" accession="ERS11142586">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142586</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540620</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_provirus.1519</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Psavirus provirus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.130873179909068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540620</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1519_length_48696_cov_6.079845_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745994_bin.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1519_length_48696_cov_6.079845_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_provirus.1519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Psavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540621" accession="ERS11142587">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142587</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540621</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_provirus.2648</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_2648_length_32474_cov_12.307251_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_2648_length_32474_cov_12.307251_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_provirus.1783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540622" accession="ERS11142588">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142588</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540622</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_provirus.611</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_611_length_88277_cov_6.926610_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745994_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_611_length_88277_cov_6.926610_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME277097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__F23-B02;s__F23-B02 sp003533405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_provirus.611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540623" accession="ERS11142589">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142589</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540623</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.1024</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.1854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1024_length_64347_cov_21.420134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1024_length_64347_cov_21.420134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745603_virus.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540624" accession="ERS11142590">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142590</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540624</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.1528</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.0364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>427</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1528_length_48523_cov_17.040395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1528_length_48523_cov_17.040395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0314473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540625" accession="ERS11142591">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142591</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540625</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.177</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_177_length_170383_cov_9.057426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5614035087719298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_177_length_170383_cov_9.057426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540626" accession="ERS11142592">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142592</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540626</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.1979</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>237.641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1979_length_40471_cov_134.520473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_1979_length_40471_cov_134.520473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME246707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__Phil1;s__Phil1 sp001940855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.1979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540627" accession="ERS11142593">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142593</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540627</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.222</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.5114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_222_length_148408_cov_12.203410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745994_bin.342</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_222_length_148408_cov_12.203410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540628" accession="ERS11142594">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142594</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540628</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.2570</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.69094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_2570_length_33111_cov_5.334716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_2570_length_33111_cov_5.334716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0351188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540629" accession="ERS11142595">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142595</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540629</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.3037</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.27083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_3037_length_29338_cov_5.048529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_3037_length_29338_cov_5.048529</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.3037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540630" accession="ERS11142596">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142596</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540630</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.357</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540630</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.0424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_357_length_118783_cov_11.545297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7045454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_357_length_118783_cov_11.545297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540631" accession="ERS11142597">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142597</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540631</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.4146</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.7899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_4146_length_23366_cov_23.192494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_4146_length_23366_cov_23.192494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.4146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540632" accession="ERS11142598">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142598</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540632</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.4742</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.65118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_4742_length_21138_cov_3.512464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_4742_length_21138_cov_3.512464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738240_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__Firm-11;s__Firm-11 sp900548145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.4742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540633" accession="ERS11142599">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142599</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540633</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.5668</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.4424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_5668_length_18367_cov_19.775123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7745994_bin.310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_5668_length_18367_cov_19.775123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.5668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540634" accession="ERS11142600">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142600</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540634</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7745994_virus.782</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7745994.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.4612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7745994) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560700) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_782_length_76920_cov_17.621241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_E_1_1719__NODE_782_length_76920_cov_17.621241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540635" accession="ERS11142601">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142601</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540635</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_provirus.11</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.3729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_11_length_329520_cov_16.908904_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746009_bin.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8809523809523809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_11_length_329520_cov_16.908904_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.1313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540636" accession="ERS11142602">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142602</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540636</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_provirus.38</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10192912905635668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>206.636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_38_length_213569_cov_104.105376_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746009_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8387096774193549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_38_length_213569_cov_104.105376_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001423</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus intestinalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0269416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540637" accession="ERS11142603">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142603</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540637</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_provirus.97</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540637</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.2295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_97_length_146092_cov_34.424977_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_97_length_146092_cov_34.424977_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium adolescentis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746009_provirus.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540638" accession="ERS11142604">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142604</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540638</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_virus.1523</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:21Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_1523_length_25311_cov_4.696838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746009_bin.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_1523_length_25311_cov_4.696838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Bilophila;s__Bilophila wadsworthia</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746009_virus.1523</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540639" accession="ERS11142605">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142605</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540639</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_virus.2588</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04408093884155772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:36Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_2588_length_16455_cov_20.858275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_2588_length_16455_cov_20.858275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0129098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540640" accession="ERS11142606">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142606</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540640</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_virus.52</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540640</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:22Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.7631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_52_length_183425_cov_20.139560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5365853658536586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_52_length_183425_cov_20.139560</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746009_virus.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540641" accession="ERS11142607">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142607</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540641</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_virus.755</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 virus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>4725.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_755_length_44265_cov_2412.581063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9245283018867924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_755_length_44265_cov_2412.581063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0315891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540642" accession="ERS11142608">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142608</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540642</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746009_virus.973</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746009.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10495594713656388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>291.991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746009) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4562110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_973_length_36116_cov_150.554399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6842105263157895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_C_5_1529__NODE_973_length_36116_cov_150.554399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0248554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540643" accession="ERS11142609">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142609</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540643</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746012_provirus.282</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7746012.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.59289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746012) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559707) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_282_length_79647_cov_5.078861_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746012_bin.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_282_length_79647_cov_5.078861_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_provirus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540644" accession="ERS11142610">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142610</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540644</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746012_provirus.78</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746012.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>102.803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746012) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559707) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_78_length_137311_cov_101.451718_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_78_length_137311_cov_101.451718_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540645" accession="ERS11142611">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142611</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540645</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746012_virus.1239</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746012.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7824339207048457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746012) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559707) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_1239_length_33939_cov_6.626307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_1239_length_33939_cov_6.626307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745414_bin.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900768625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1127389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540646" accession="ERS11142612">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142612</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540646</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746012_virus.1493</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746012.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01693336174904671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746012) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559707) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_1493_length_29711_cov_6.486569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_1493_length_29711_cov_6.486569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.1937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540647" accession="ERS11142613">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142613</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540647</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746012_virus.185</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7746012.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.2587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746012) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559707) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_185_length_98060_cov_12.956054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.935064935064935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_185_length_98060_cov_12.956054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540648" accession="ERS11142614">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142614</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540648</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746012_virus.656</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746012.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746012) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559707) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_656_length_50722_cov_6.540014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_P_14_2348__NODE_656_length_50722_cov_6.540014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738238_provirus.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540649" accession="ERS11142615">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142615</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540649</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_provirus.1249</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.1388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1249_length_58664_cov_5.132009_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1249_length_58664_cov_5.132009_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746365_bin.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA5884;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540650" accession="ERS11142616">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142616</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540650</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_provirus.247</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.3608</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_247_length_155333_cov_14.099036_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746030_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_247_length_155333_cov_14.099036_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_virus.2332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540651" accession="ERS11142617">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142617</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540651</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_provirus.43</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>129.859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_43_length_328355_cov_71.989768_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746030_bin.313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_43_length_328355_cov_71.989768_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.2611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540652" accession="ERS11142618">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142618</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540652</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_provirus.731</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6649779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.5117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_731_length_84827_cov_59.373782_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8484848484848485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_731_length_84827_cov_59.373782_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_provirus.731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540653" accession="ERS11142619">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142619</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540653</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.1190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1424559471365639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1190_length_60869_cov_8.263982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1190_length_60869_cov_8.263982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_virus.1376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540654" accession="ERS11142620">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142620</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540654</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.1566</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540654</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1566_length_50911_cov_11.193650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1566_length_50911_cov_11.193650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746365_bin.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA5884;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.1566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540655" accession="ERS11142621">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142621</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540655</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.1909</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540655</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1909_length_44092_cov_6.530819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_1909_length_44092_cov_6.530819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745613_bin.13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Marvinbryantia;s__Marvinbryantia sp014385005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.1909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540656" accession="ERS11142622">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142622</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540656</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.2081</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_2081_length_41405_cov_6.281673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_2081_length_41405_cov_6.281673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738571_bin.392</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__UBA644;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.2081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540657" accession="ERS11142623">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142623</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540657</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.2407</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.7393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_2407_length_36906_cov_47.755519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746030_bin.295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_2407_length_36906_cov_47.755519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745563_virus.395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540658" accession="ERS11142624">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142624</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540658</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.2796</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540658</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_2796_length_32672_cov_8.821506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_2796_length_32672_cov_8.821506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738192_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Sphaerochaetales;f__Sphaerochaetaceae;g__UBA9732;s__UBA9732 sp001940825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738213_provirus.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540659" accession="ERS11142625">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142625</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540659</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.3828</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.3414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_3828_length_25300_cov_12.529913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_3828_length_25300_cov_12.529913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__NK3B98;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.3828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540660" accession="ERS11142626">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142626</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540660</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.5154</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_5154_length_19615_cov_7.715733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_5154_length_19615_cov_7.715733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.5154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540661" accession="ERS11142627">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142627</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540661</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.6811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_6811_length_15262_cov_11.906223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_6811_length_15262_cov_11.906223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.6811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540662" accession="ERS11142628">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142628</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540662</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746030_virus.950</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746030.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.9236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746030) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560629) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_950_length_71687_cov_7.237271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_18_1660__NODE_950_length_71687_cov_7.237271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540663" accession="ERS11142629">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142629</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540663</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_provirus.331</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.864977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>352.457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_331_length_110322_cov_208.444673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746048_bin.132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_331_length_110322_cov_208.444673_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_provirus.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540664" accession="ERS11142630">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142630</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540664</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_provirus.677</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.0204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_677_length_73421_cov_19.350172_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746048_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_677_length_73421_cov_19.350172_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540665" accession="ERS11142631">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142631</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540665</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.1035</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12932322959045642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.0845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_1035_length_56934_cov_12.497265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7741935483870968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_1035_length_56934_cov_12.497265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_virus.1035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540666" accession="ERS11142632">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142632</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540666</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.1447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.4755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_1447_length_46316_cov_20.388373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_1447_length_46316_cov_20.388373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_E</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_virus.1447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540667" accession="ERS11142633">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142633</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540667</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.1720</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.7978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_1720_length_41665_cov_25.798476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_1720_length_41665_cov_25.798476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_virus.1720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540668" accession="ERS11142634">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142634</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540668</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540668</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_202_length_136061_cov_6.716967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.96875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_202_length_136061_cov_6.716967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_virus.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540669" accession="ERS11142635">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142635</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540669</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.235</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.3897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_235_length_128125_cov_10.531504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6964285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_235_length_128125_cov_10.531504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_virus.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540670" accession="ERS11142636">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142636</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540670</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.353</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>44.7643</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_353_length_106551_cov_26.349278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_353_length_106551_cov_26.349278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540671" accession="ERS11142637">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142637</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540671</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.4363</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.21435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_4363_length_21993_cov_3.884787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_4363_length_21993_cov_3.884787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738595_provirus.1146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540672" accession="ERS11142638">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142638</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540672</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.5954</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04588790558384566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.4306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_5954_length_17379_cov_21.990232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_5954_length_17379_cov_21.990232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745890_bin.370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__QAMH01;g__W2P13-069;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_virus.5954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540673" accession="ERS11142639">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142639</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540673</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746048_virus.7743</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746048.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>251.678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746048) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561940) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_7743_length_14069_cov_145.278241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_G_7_1734__NODE_7743_length_14069_cov_145.278241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745816_bin.141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_virus.7743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540674" accession="ERS11142640">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142640</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540674</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_provirus.1127</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Phifelvirus provirus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.7682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1127_length_48527_cov_18.198452_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1127_length_48527_cov_18.198452_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_provirus.1127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Phifelvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540675" accession="ERS11142641">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142641</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540675</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_provirus.259</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.0258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_259_length_98075_cov_11.945693_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746051_bin.186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_259_length_98075_cov_11.945693_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_provirus.259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540676" accession="ERS11142642">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142642</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540676</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_provirus.632</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.0186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_632_length_64927_cov_16.524827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_632_length_64927_cov_16.524827_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540677" accession="ERS11142643">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142643</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540677</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_provirus.845</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_845_length_57126_cov_10.954916_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_845_length_57126_cov_10.954916_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger formicilis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746048_provirus.595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540678" accession="ERS11142644">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142644</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540678</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.1179</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1179_length_47208_cov_15.268104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6216216216216216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1179_length_47208_cov_15.268104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.1179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540679" accession="ERS11142645">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142645</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540679</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.1276</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>348.509</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1276_length_45080_cov_193.515166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4772727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1276_length_45080_cov_193.515166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.1276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540680" accession="ERS11142646">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142646</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540680</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.1432</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Clostridium phage phi8074-B1 virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540680</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.5135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1432_length_42388_cov_13.459550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1432_length_42388_cov_13.459550</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746217_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Romboutsia;s__Romboutsia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.1432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Clostridium phage phi8074-B1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540681" accession="ERS11142647">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142647</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540681</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.158</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20800152472944827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540681</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.9132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_158_length_119469_cov_10.889356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_158_length_119469_cov_10.889356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540682" accession="ERS11142648">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142648</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540682</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.1805</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1805_length_37242_cov_7.219508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1805_length_37242_cov_7.219508</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__CAG-448;s__CAG-448 sp003150135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.1805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540683" accession="ERS11142649">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142649</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540683</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.1923</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0490368859781216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.63135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1923_length_35937_cov_4.678277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_1923_length_35937_cov_4.678277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME283914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900066565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.1923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540684" accession="ERS11142650">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142650</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540684</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.213</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540684</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.4045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_213_length_105643_cov_19.626850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7560975609756098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_213_length_105643_cov_19.626850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540685" accession="ERS11142651">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142651</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540685</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.9004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_233_length_103068_cov_6.644318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7073170731707317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_233_length_103068_cov_6.644318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME101580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Enterocloster;s__Enterocloster sp000431375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540686" accession="ERS11142652">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142652</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540686</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.262</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17747797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.9254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_262_length_97696_cov_18.487292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9659090909090908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_262_length_97696_cov_18.487292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540687" accession="ERS11142653">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142653</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540687</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.3013</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07725770925110134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>685.836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_3013_length_26772_cov_420.191834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_3013_length_26772_cov_420.191834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738661_virus.4060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540688" accession="ERS11142654">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142654</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540688</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.34</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_34_length_203845_cov_8.262097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_34_length_203845_cov_8.262097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540689" accession="ERS11142655">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142655</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540689</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.4134</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.556855397479522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_4134_length_21797_cov_6.999448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_4134_length_21797_cov_6.999448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0196334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540690" accession="ERS11142656">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142656</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540690</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.4927</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Gregsiragusavirus virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540690</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_4927_length_19328_cov_10.788323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_4927_length_19328_cov_10.788323</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.4927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Guelinviridae; Denniswatsonvirinae; Gregsiragusavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540691" accession="ERS11142657">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142657</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540691</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.5654</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>47.7335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_5654_length_17514_cov_29.006251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746051_bin.154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_5654_length_17514_cov_29.006251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__UBA10281;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540692" accession="ERS11142658">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142658</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540692</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.745</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_745_length_60528_cov_10.663728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6585365853658537</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_745_length_60528_cov_10.663728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738621_virus.263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540693" accession="ERS11142659">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142659</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540693</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.813</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.8302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_813_length_58405_cov_50.065543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_813_length_58405_cov_50.065543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540694" accession="ERS11142660">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142660</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540694</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746051_virus.996</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746051.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746051) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560669) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_996_length_52263_cov_8.147568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7241379310344828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_21_1706__NODE_996_length_52263_cov_8.147568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME283914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900066565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540695" accession="ERS11142661">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142661</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540695</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_provirus.1411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>351.161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1411_length_63916_cov_196.040352_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1411_length_63916_cov_196.040352_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_provirus.854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540696" accession="ERS11142662">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142662</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540696</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_provirus.2245</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.1935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2245_length_45695_cov_27.760796_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2245_length_45695_cov_27.760796_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0342275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540697" accession="ERS11142663">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142663</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540697</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_provirus.381</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540697</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_381_length_134143_cov_8.832306_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_381_length_134143_cov_8.832306_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746738_bin.136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900554275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540698" accession="ERS11142664">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142664</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540698</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_provirus.59</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.4116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_59_length_304942_cov_17.682371_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746061_bin.217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_59_length_304942_cov_17.682371_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_provirus.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540699" accession="ERS11142665">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142665</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540699</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_provirus.917</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.6676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_917_length_83399_cov_10.625597_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746061_bin.313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_917_length_83399_cov_10.625597_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_provirus.917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540700" accession="ERS11142666">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142666</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540700</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.1254</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>497.388</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1254_length_68759_cov_272.913806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1254_length_68759_cov_272.913806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.1254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540701" accession="ERS11142667">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142667</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540701</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.1617</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.1854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1617_length_58504_cov_12.835333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.32142857142857145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1617_length_58504_cov_12.835333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_virus.440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540702" accession="ERS11142668">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142668</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540702</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.1893</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1893_length_52273_cov_7.539562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_1893_length_52273_cov_7.539562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME122359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.1893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540703" accession="ERS11142669">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142669</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540703</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.2221</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>136.612</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2221_length_46078_cov_74.803787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6486486486486487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2221_length_46078_cov_74.803787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.2221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540704" accession="ERS11142670">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142670</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540704</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.235</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540704</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>100.896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_235_length_170493_cov_57.235089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_235_length_170493_cov_57.235089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540705" accession="ERS11142671">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142671</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540705</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.2623</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.1729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2623_length_40533_cov_13.144577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.38461538461538464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2623_length_40533_cov_13.144577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.2623</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540706" accession="ERS11142672">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142672</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540706</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.2741</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2741_length_39362_cov_13.460099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_2741_length_39362_cov_13.460099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746692_virus.650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540707" accession="ERS11142673">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142673</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540707</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.3099</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.96144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_3099_length_35655_cov_4.244533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_3099_length_35655_cov_4.244533</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger variabilis_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738609_provirus.442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540708" accession="ERS11142674">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142674</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540708</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.327</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.9786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_327_length_141992_cov_11.995483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_327_length_141992_cov_11.995483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745378_virus.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540709" accession="ERS11142675">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142675</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540709</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.366</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540709</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_366_length_136311_cov_9.542273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5974025974025974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_366_length_136311_cov_9.542273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540710" accession="ERS11142676">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142676</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540710</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.3877</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>126.516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_3877_length_29956_cov_71.077981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_3877_length_29956_cov_71.077981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738261_virus.503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540711" accession="ERS11142677">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142677</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540711</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.4423</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07068695990175425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_4423_length_26948_cov_7.859737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_4423_length_26948_cov_7.859737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540712" accession="ERS11142678">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142678</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540712</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.4920</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00135789431064384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.10735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_4920_length_24669_cov_4.221861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_4920_length_24669_cov_4.221861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746061_virus.4920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540713" accession="ERS11142679">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142679</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540713</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.6145</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.939977973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.2915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_6145_length_20503_cov_25.678106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_6145_length_20503_cov_25.678106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745975_virus.2469</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540714" accession="ERS11142680">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142680</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540714</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746061_virus.7311</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746061.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746061) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560646) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_7311_length_17753_cov_6.751358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_J_22_1662__NODE_7311_length_17753_cov_6.751358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.4689</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540715" accession="ERS11142681">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142681</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540715</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_provirus.1156</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.9669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_1156_length_68310_cov_19.176879_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746094_bin.363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_1156_length_68310_cov_19.176879_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME169940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp003487665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745648_provirus.969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540716" accession="ERS11142682">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142682</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540716</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_provirus.2147</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11433820478765556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540716</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.3397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2147_length_45059_cov_16.263216_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746094_bin.174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2147_length_45059_cov_16.263216_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_provirus.1261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540717" accession="ERS11142683">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142683</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540717</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_provirus.3762</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_3762_length_28818_cov_9.760690_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_3762_length_28818_cov_9.760690_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738535_provirus.311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540718" accession="ERS11142684">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142684</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540718</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_provirus.922</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15590814195772124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.1274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_922_length_78267_cov_37.673360_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746094_bin.148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_922_length_78267_cov_37.673360_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_provirus.503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540719" accession="ERS11142685">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142685</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540719</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.150</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>429.784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_150_length_197298_cov_237.874020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5121951219512195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_150_length_197298_cov_237.874020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA4372;s__UBA4372 sp900543815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_virus.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540720" accession="ERS11142686">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142686</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540720</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.1933</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>203.083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_1933_length_48523_cov_116.085621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4583333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_1933_length_48523_cov_116.085621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.1933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540721" accession="ERS11142687">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142687</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540721</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.2309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540721</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:23Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.1722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2309_length_42949_cov_19.931960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2309_length_42949_cov_19.931960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_virus.746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540722" accession="ERS11142688">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142688</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540722</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.2536</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15927917672702033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.74003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2536_length_39682_cov_5.465547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2536_length_39682_cov_5.465547</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900290275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540723" accession="ERS11142689">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142689</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540723</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.2809</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.0626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2809_length_36642_cov_19.315876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_2809_length_36642_cov_19.315876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738616_provirus.420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540724" accession="ERS11142690">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142690</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540724</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.3172</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_3172_length_33331_cov_8.847477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_3172_length_33331_cov_8.847477</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.3172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540725" accession="ERS11142691">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142691</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540725</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.3761</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.28059885417488195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.4946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_3761_length_28819_cov_35.575569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_3761_length_28819_cov_35.575569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.2411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540726" accession="ERS11142692">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142692</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540726</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.4314</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540726</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_4314_length_25534_cov_10.677142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_4314_length_25534_cov_10.677142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.1351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540727" accession="ERS11142693">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142693</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540727</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.4949</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_4949_length_22644_cov_7.443967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7222222222222222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_4949_length_22644_cov_7.443967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745346_bin.170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900768995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.4949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540728" accession="ERS11142694">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142694</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540728</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.5674</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01358752575482346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.4279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_5674_length_19976_cov_24.244183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_5674_length_19976_cov_24.244183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745989_bin.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UMGS2068;s__UMGS2068 sp900769635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_virus.2492</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540729" accession="ERS11142695">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142695</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540729</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.7024</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Beecentumtrevirus virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_7024_length_16361_cov_29.462601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_7024_length_16361_cov_29.462601</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738569_virus.5962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae; Picovirinae; Beecentumtrevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540730" accession="ERS11142696">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142696</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540730</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746094_virus.9819</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salmonella phage assan virus assembled from ERR7746094.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14709484992950092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746094) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561846) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_9819_length_12000_cov_11.051172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_G_14_1102__NODE_9819_length_12000_cov_11.051172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_virus.6216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus; Salmonella phage assan</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540731" accession="ERS11142697">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142697</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540731</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_provirus.176</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.61888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_176_length_128538_cov_5.181767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746101_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_176_length_128538_cov_5.181767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-533;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.1871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540732" accession="ERS11142698">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142698</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540732</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_provirus.881</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540732</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.5694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_881_length_53354_cov_44.005837_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_881_length_53354_cov_44.005837_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_provirus.881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540733" accession="ERS11142699">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142699</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540733</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.1326</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08814699712046228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.80322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_1326_length_40865_cov_5.199544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_1326_length_40865_cov_5.199544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.2296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540734" accession="ERS11142700">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142700</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540734</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.1527</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.0273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_1527_length_37075_cov_13.754176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_1527_length_37075_cov_13.754176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS874;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.1024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540735" accession="ERS11142701">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142701</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540735</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.1789</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04117563148169571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>396</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_1789_length_33064_cov_11.694789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_1789_length_33064_cov_11.694789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540736" accession="ERS11142702">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142702</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540736</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.214</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_214_length_118569_cov_11.127460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.660377358490566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_214_length_118569_cov_11.127460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738174_bin.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__UMGS1994;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540737" accession="ERS11142703">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142703</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540737</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.293</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr130_1 virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>182.594</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_293_length_97742_cov_102.959863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7628865979381443</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_293_length_97742_cov_102.959863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella stercorea</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; crAssphage cr130_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540738" accession="ERS11142704">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142704</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540738</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.46</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Takahashivirus virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_46_length_227439_cov_12.970888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_46_length_227439_cov_12.970888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia;s__Blautia stercoris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Takahashivirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540739" accession="ERS11142705">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142705</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540739</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.66</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540739</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>111.532</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_66_length_197036_cov_62.405460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_66_length_197036_cov_62.405460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747083_bin.116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__UMGS1994;s__UMGS1994 sp900556975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540740" accession="ERS11142706">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142706</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540740</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746101_virus.822</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746101.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540740</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.3541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746101) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561613) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_822_length_55956_cov_14.714043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585590</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_22_1166__NODE_822_length_55956_cov_14.714043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.1503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540741" accession="ERS11142707">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142707</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540741</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_provirus.1230</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540741</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.9715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_1230_length_48969_cov_39.512476_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_1230_length_48969_cov_39.512476_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746785_virus.1002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540742" accession="ERS11142708">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142708</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540742</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_provirus.1825</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_1825_length_37202_cov_6.690559_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_1825_length_37202_cov_6.690559_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_provirus.1825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540743" accession="ERS11142709">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142709</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540743</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_provirus.394</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.46376</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_394_length_95096_cov_5.391269_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_394_length_95096_cov_5.391269_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_provirus.394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540744" accession="ERS11142710">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142710</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540744</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_provirus.800</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vibrio phage douglas 12A4 provirus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25495594713656383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.7255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_800_length_63883_cov_27.545137_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_800_length_63883_cov_27.545137_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738199_virus.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Vibrio phage douglas 12A4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540745" accession="ERS11142711">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142711</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540745</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_virus.1274</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2557089863334722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.12866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_1274_length_47872_cov_5.520410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746111_bin.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_1274_length_47872_cov_5.520410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__TF01-11;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_virus.1274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540746" accession="ERS11142712">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142712</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540746</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_virus.2100</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.0521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_2100_length_33539_cov_16.624440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_2100_length_33539_cov_16.624440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540747" accession="ERS11142713">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142713</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540747</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_virus.2843</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540747</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.2399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_2843_length_26430_cov_48.693014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_2843_length_26430_cov_48.693014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738144_bin.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900767615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745867_virus.243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540748" accession="ERS11142714">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142714</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540748</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_virus.353</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.1265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_353_length_102335_cov_34.197921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_353_length_102335_cov_34.197921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7737964_bin.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;s__Phascolarctobacterium_A sp900551335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738617_virus.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540749" accession="ERS11142715">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142715</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540749</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746111_virus.832</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746111.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04377392670077858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.5432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746111) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559340) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_832_length_62446_cov_33.851160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9428571428571428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_4_2616__NODE_832_length_62446_cov_33.851160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_virus.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540750" accession="ERS11142716">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142716</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540750</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_provirus.153</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_153_length_191232_cov_8.216332_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746113_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_153_length_191232_cov_8.216332_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_provirus.153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540751" accession="ERS11142717">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142717</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540751</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_provirus.2376</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>28.5951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2376_length_38996_cov_16.227729_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2376_length_38996_cov_16.227729_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME023798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900543155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_virus.754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540752" accession="ERS11142718">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142718</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540752</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_provirus.440</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.5451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_440_length_117293_cov_17.742484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_440_length_117293_cov_17.742484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738223_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__V9D3004;s__V9D3004 sp002349525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738223_virus.1366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540753" accession="ERS11142719">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142719</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540753</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_provirus.805</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540753</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.4467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_805_length_84123_cov_14.689967_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_805_length_84123_cov_14.689967_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738243_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900551495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540754" accession="ERS11142720">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142720</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540754</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.1201</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.3803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1201_length_64560_cov_11.231332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1201_length_64560_cov_11.231332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738593_virus.686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540755" accession="ERS11142721">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142721</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540755</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.1290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>257.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1290_length_61177_cov_144.506252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1290_length_61177_cov_144.506252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747426_bin.316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D succinifaciens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.1290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540756" accession="ERS11142722">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142722</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540756</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.1716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.9287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1716_length_49416_cov_27.510022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1716_length_49416_cov_27.510022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746285_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFTH01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540757" accession="ERS11142723">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142723</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540757</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.1988</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.0786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1988_length_44517_cov_9.374190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6470588235294118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_1988_length_44517_cov_9.374190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_provirus.500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540758" accession="ERS11142724">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142724</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540758</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.2090</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540758</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>109.939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2090_length_42872_cov_62.675757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9032258064516128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2090_length_42872_cov_62.675757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738598_virus.929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540759" accession="ERS11142725">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142725</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540759</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.2294</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2294_length_39984_cov_8.326284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2294_length_39984_cov_8.326284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.2294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540760" accession="ERS11142726">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142726</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540760</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.2399</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2399_length_38703_cov_8.380650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.89</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2399_length_38703_cov_8.380650</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540761" accession="ERS11142727">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142727</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540761</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.2548</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2548_length_36789_cov_6.319351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_2548_length_36789_cov_6.319351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738611_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__Firm-11;s__Firm-11 sp004556545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.2548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540762" accession="ERS11142728">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142728</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540762</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.275</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>7.8965839962564e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.7192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_275_length_147450_cov_49.863197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.639344262295082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_275_length_147450_cov_49.863197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540763" accession="ERS11142729">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142729</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540763</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.3014</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>138.305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_3014_length_32212_cov_75.835631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_3014_length_32212_cov_75.835631</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.3014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540764" accession="ERS11142730">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142730</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540764</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.3440</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06734581497797357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540764</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.3564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_3440_length_28841_cov_23.861702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_3440_length_28841_cov_23.861702</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME246707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__Phil1;s__Phil1 sp001940855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.3440</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540765" accession="ERS11142731">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142731</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540765</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.4521</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04215678870616059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.65334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_4521_length_22832_cov_5.178994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_4521_length_22832_cov_5.178994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS902;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_provirus.394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540766" accession="ERS11142732">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142732</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540766</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.504</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_504_length_109923_cov_7.057826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_504_length_109923_cov_7.057826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746113_virus.504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540767" accession="ERS11142733">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142733</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540767</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.5659</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Likavirus virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540767</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_5659_length_18505_cov_8.328468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746113_bin.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_5659_length_18505_cov_8.328468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0240719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arquatrovirinae; Likavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540768" accession="ERS11142734">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142734</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540768</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746113_virus.709</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746113.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.7169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746113) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561888) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_709_length_91040_cov_10.555160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5348837209302325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_6_1152__NODE_709_length_91040_cov_10.555160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738584_virus.279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540769" accession="ERS11142735">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142735</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540769</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_provirus.169</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.7416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_169_length_87237_cov_17.592634_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746144_bin.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_169_length_87237_cov_17.592634_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_provirus.169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540770" accession="ERS11142736">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142736</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540770</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_provirus.496</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20743392070484584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.1313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_496_length_50526_cov_15.808658_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746144_bin.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_496_length_50526_cov_15.808658_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.2031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540771" accession="ERS11142737">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142737</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540771</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_virus.1250</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1234392592042222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.77208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_1250_length_28656_cov_4.734070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_1250_length_28656_cov_4.734070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745823_bin.247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_F;s__Eubacterium_F sp900539115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540772" accession="ERS11142738">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142738</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540772</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_virus.2268</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1479</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_2268_length_19371_cov_10.231678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_2268_length_19371_cov_10.231678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_virus.2268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540773" accession="ERS11142739">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142739</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540773</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_virus.397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540773</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.5429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_397_length_57809_cov_17.252962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_397_length_57809_cov_17.252962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_virus.397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540774" accession="ERS11142740">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142740</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540774</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_virus.563</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_563_length_46566_cov_7.833574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_563_length_46566_cov_7.833574</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_virus.693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540775" accession="ERS11142741">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142741</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540775</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_virus.672</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1432089863334723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.6871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_672_length_42249_cov_9.058333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_672_length_42249_cov_9.058333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745438_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540776" accession="ERS11142742">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142742</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540776</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746144_virus.91</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746144.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.7935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746144) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560517) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_91_length_109951_cov_37.629703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6944444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_12_1914__NODE_91_length_109951_cov_37.629703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Agathobaculum;s__Agathobaculum butyriciproducens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_virus.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540777" accession="ERS11142743">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142743</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540777</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_provirus.1397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1397_length_46813_cov_5.958041_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1397_length_46813_cov_5.958041_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745890_bin.370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__QAMH01;g__W2P13-069;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_provirus.1397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540778" accession="ERS11142744">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142744</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540778</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_provirus.257</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>144.868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_257_length_110347_cov_88.110248_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746184_bin.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_257_length_110347_cov_88.110248_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738566_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540779" accession="ERS11142745">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142745</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540779</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_provirus.482</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>203.696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_482_length_81263_cov_121.161875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746184_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_482_length_81263_cov_121.161875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_provirus.482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540780" accession="ERS11142746">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142746</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540780</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.1006</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1006_length_55801_cov_12.612142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1006_length_55801_cov_12.612142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738607_virus.1165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540781" accession="ERS11142747">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142747</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540781</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.1204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1371696035242291</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.82224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1204_length_50825_cov_5.804189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1204_length_50825_cov_5.804189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0367890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540782" accession="ERS11142748">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142748</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540782</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.1707</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1707_length_41316_cov_9.230219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1707_length_41316_cov_9.230219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540783" accession="ERS11142749">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142749</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540783</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.1964</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.7546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1964_length_37935_cov_10.728644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_1964_length_37935_cov_10.728644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.1964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540784" accession="ERS11142750">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142750</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540784</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.2211</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540784</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_2211_length_34760_cov_7.189315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_2211_length_34760_cov_7.189315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_virus.1556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540785" accession="ERS11142751">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142751</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540785</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.2707</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.23238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_2707_length_30243_cov_5.277829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_2707_length_30243_cov_5.277829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540786" accession="ERS11142752">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142752</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540786</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.3305</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2432438154783468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.71987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_3305_length_26395_cov_5.792233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_3305_length_26395_cov_5.792233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738165_virus.1127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540787" accession="ERS11142753">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142753</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540787</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.4088</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.56069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_4088_length_22715_cov_4.835454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_4088_length_22715_cov_4.835454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.4088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540788" accession="ERS11142754">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142754</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540788</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746184_virus.7773</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7746184.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.41965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746184) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561544) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_7773_length_13697_cov_4.215051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_5_1522__NODE_7773_length_13697_cov_4.215051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.4256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540789" accession="ERS11142755">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142755</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540789</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746185_virus.213</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746185.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.5069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746185) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560394) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_10_1834__NODE_213_length_23153_cov_32.876885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_10_1834__NODE_213_length_23153_cov_32.876885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;s__Bifidobacterium infantis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745561_provirus.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540790" accession="ERS11142756">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142756</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540790</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_provirus.1507</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540790</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1507_length_56976_cov_56.146294_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8095238095238095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1507_length_56976_cov_56.146294_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540791" accession="ERS11142757">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142757</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540791</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_provirus.2063</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08836726143764287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_2063_length_46715_cov_5.782774_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746217_bin.403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_2063_length_46715_cov_5.782774_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME111561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_provirus.2063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540792" accession="ERS11142758">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142758</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540792</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_provirus.432</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Chimpanzee faeces associated microphage 2 provirus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.4781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_432_length_109477_cov_29.443026_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746217_bin.284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_432_length_109477_cov_29.443026_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UMGS973;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_provirus.432</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Chimpanzee faeces associated microphage 2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540793" accession="ERS11142759">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142759</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540793</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_provirus.634</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540793</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.298400000000001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_634_length_93599_cov_7.790434_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746217_bin.371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_634_length_93599_cov_7.790434_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_provirus.634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540794" accession="ERS11142760">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142760</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540794</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.1038</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.8556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1038_length_72460_cov_43.937720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1038_length_72460_cov_43.937720</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__RUG472;s__RUG472 sp900545265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.1153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540795" accession="ERS11142761">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142761</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540795</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.1346</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>383.113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1346_length_61387_cov_227.849666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1346_length_61387_cov_227.849666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>scientific_name</TAG>
        <VALUE>uncultured Caudovirales phage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746436_virus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540796" accession="ERS11142762">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142762</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540796</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.1780</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4772577092511012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540796</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1780_length_51287_cov_6.822554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1780_length_51287_cov_6.822554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.1780</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540797" accession="ERS11142763">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142763</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540797</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.1986</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.3824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1986_length_47802_cov_32.893263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.380952380952381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_1986_length_47802_cov_32.893263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.1986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540798" accession="ERS11142764">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142764</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540798</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.2215</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>80.9102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_2215_length_44442_cov_48.918517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_2215_length_44442_cov_48.918517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540799" accession="ERS11142765">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142765</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540799</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.2467</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_2467_length_41267_cov_7.370187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_2467_length_41267_cov_7.370187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0288227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540800" accession="ERS11142766">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142766</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540800</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.278</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_278_length_129195_cov_6.526216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_278_length_129195_cov_6.526216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540801" accession="ERS11142767">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142767</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540801</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.3193</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7069113353173286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.0019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_3193_length_34229_cov_19.211671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746217_bin.257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_3193_length_34229_cov_19.211671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540802" accession="ERS11142768">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142768</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540802</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.3710</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_3710_length_30369_cov_12.962168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746217_bin.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_3710_length_30369_cov_12.962168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745989_bin.108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__RUG806;s__RUG806 sp900313475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738186_provirus.420</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540803" accession="ERS11142769">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142769</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540803</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.4829</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04719162995594713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.3573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_4829_length_24379_cov_5.450086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_4829_length_24379_cov_5.450086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.4829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540804" accession="ERS11142770">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142770</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540804</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.5580</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540804</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_5580_length_21636_cov_6.123197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746217_bin.339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_5580_length_21636_cov_6.123197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.5580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540805" accession="ERS11142771">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142771</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540805</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.666</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr271_1 virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10247797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.8069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_666_length_92099_cov_18.248441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8387096774193549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_666_length_92099_cov_18.248441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr271_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540806" accession="ERS11142772">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142772</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540806</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746217_virus.794</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746217.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746217) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567680) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_794_length_83447_cov_6.821674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5853658536585366</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_12_2317__NODE_794_length_83447_cov_6.821674</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME247421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__UBA11524;s__UBA11524 sp000437595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540807" accession="ERS11142773">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142773</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540807</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_provirus.120</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>267.238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_120_length_175472_cov_278.517825_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_120_length_175472_cov_278.517825_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_provirus.120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540808" accession="ERS11142774">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142774</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540808</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_provirus.289</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.7742</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_289_length_126621_cov_12.826132_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_289_length_126621_cov_12.826132_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_provirus.223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540809" accession="ERS11142775">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142775</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540809</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_provirus.787</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.0062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_787_length_77329_cov_31.531002_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_787_length_77329_cov_31.531002_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746051_virus.618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540811" accession="ERS11142777">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142777</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540811</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_virus.1633</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06486784140969162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.7114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_1633_length_49310_cov_35.523226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_1633_length_49310_cov_35.523226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_virus.1633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540812" accession="ERS11142778">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142778</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540812</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_virus.2118</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540812</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.4907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_2118_length_40191_cov_18.940395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_2118_length_40191_cov_18.940395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738194_provirus.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540813" accession="ERS11142779">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142779</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540813</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_virus.2473</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14523598014784908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.3365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_2473_length_35642_cov_5.653255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_2473_length_35642_cov_5.653255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_virus.2473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540814" accession="ERS11142780">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142780</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540814</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_virus.3440</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.3513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_3440_length_26468_cov_8.205184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_3440_length_26468_cov_8.205184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540815" accession="ERS11142781">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142781</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540815</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_virus.5144</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.6703</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_5144_length_18083_cov_8.311230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_5144_length_18083_cov_8.311230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738660_bin.360</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.4652</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540816" accession="ERS11142782">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142782</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540816</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746233_provirus.133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.6891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_133_length_80603_cov_12.338797_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746233_bin.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_133_length_80603_cov_12.338797_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0329648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540817" accession="ERS11142783">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142783</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540817</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746233_provirus.45</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_45_length_149483_cov_11.844149_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5172413793103449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_45_length_149483_cov_11.844149_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_provirus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540818" accession="ERS11142784">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142784</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540818</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746233_virus.225</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.34245594713656385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_225_length_55639_cov_6.982128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746233_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_225_length_55639_cov_6.982128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540819" accession="ERS11142785">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142785</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540819</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746233_virus.607</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746233.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746233) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560872) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_607_length_27646_cov_7.365483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_D_5_1545__NODE_607_length_27646_cov_7.365483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_virus.694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540820" accession="ERS11142786">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142786</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540820</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_provirus.1334</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1334_length_44749_cov_7.307799_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1334_length_44749_cov_7.307799_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.1334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540821" accession="ERS11142787">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142787</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540821</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_provirus.211</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.37747797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_211_length_132671_cov_6.333869_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746285_bin.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_211_length_132671_cov_6.333869_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738152_provirus.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540822" accession="ERS11142788">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142788</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540822</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_provirus.526</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.3722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_526_length_80704_cov_7.464460_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_526_length_80704_cov_7.464460_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540823" accession="ERS11142789">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142789</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540823</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_provirus.822</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_822_length_60457_cov_6.801722_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746285_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_822_length_60457_cov_6.801722_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540824" accession="ERS11142790">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142790</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540824</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.1261</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Pbunavirus virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>297.355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1261_length_46259_cov_180.399355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5294117647058824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1261_length_46259_cov_180.399355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.1261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Pbunavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540825" accession="ERS11142791">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142791</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540825</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.1415</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>136.657</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1415_length_43179_cov_82.053060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.35294117647058826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1415_length_43179_cov_82.053060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME254918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFMI01;s__SFMI01 sp004556155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745919_virus.889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540826" accession="ERS11142792">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142792</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540826</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.1631</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.4219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1631_length_39187_cov_6.731629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746285_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1631_length_39187_cov_6.731629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738576_virus.1308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540827" accession="ERS11142793">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142793</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540827</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.1791</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.8372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1791_length_36659_cov_40.005303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_1791_length_36659_cov_40.005303</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.1791</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540828" accession="ERS11142794">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142794</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540828</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.2188</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.9946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_2188_length_32271_cov_28.322762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_2188_length_32271_cov_28.322762</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738576_bin.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Atopobiaceae;g__Olsenella_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_provirus.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540829" accession="ERS11142795">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142795</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540829</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.2829</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540829</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.47136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_2829_length_27036_cov_4.370155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_2829_length_27036_cov_4.370155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.1663</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540830" accession="ERS11142796">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142796</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540830</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.3856</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.9355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_3856_length_21970_cov_8.391860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9411764705882352</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_3856_length_21970_cov_8.391860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Slackia_A;s__Slackia_A isoflavoniconvertens</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.3856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540831" accession="ERS11142797">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142797</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540831</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.558</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.1348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_558_length_78398_cov_7.334776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7027027027027027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_558_length_78398_cov_7.334776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.558</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540832" accession="ERS11142798">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142798</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540832</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746285_virus.811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746285.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.5361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746285) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567854) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_811_length_60826_cov_36.278671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_12_1545__NODE_811_length_60826_cov_36.278671</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_virus.811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540833" accession="ERS11142799">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142799</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540833</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_provirus.1218</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540833</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>97.2864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1218_length_54656_cov_63.144415_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1218_length_54656_cov_63.144415_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738251_virus.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540834" accession="ERS11142800">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142800</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540834</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_provirus.170</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>93.4978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_170_length_152660_cov_50.486955_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746321_bin.285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_170_length_152660_cov_50.486955_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745963_bin.410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp001916075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_provirus.194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540835" accession="ERS11142801">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142801</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540835</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_provirus.268</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>54.8982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_268_length_123485_cov_31.020177_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9107142857142856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_268_length_123485_cov_31.020177_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_provirus.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540836" accession="ERS11142802">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142802</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540836</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_provirus.428</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.2541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_428_length_98461_cov_18.262248_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746321_bin.278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_428_length_98461_cov_18.262248_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_provirus.322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540837" accession="ERS11142803">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142803</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540837</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_provirus.745</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.64564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_745_length_73408_cov_5.027642_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8148148148148148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_745_length_73408_cov_5.027642_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540838" accession="ERS11142804">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142804</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540838</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.1082</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.8381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1082_length_58649_cov_15.653555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1082_length_58649_cov_15.653555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745384_bin.138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900553155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.1155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540839" accession="ERS11142805">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142805</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540839</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.1631</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540839</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.2279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1631_length_45844_cov_12.365482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1631_length_45844_cov_12.365482</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.1553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540840" accession="ERS11142806">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142806</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540840</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.1842</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>127.803</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1842_length_42666_cov_79.857404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_1842_length_42666_cov_79.857404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738200_virus.670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540841" accession="ERS11142807">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142807</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540841</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.2281</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540841</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.6014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_2281_length_36917_cov_24.069897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9772727272727272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_2281_length_36917_cov_24.069897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738530_bin.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium sp900539945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.2281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540842" accession="ERS11142808">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142808</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540842</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.2793</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4635</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_2793_length_32273_cov_7.776991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_2793_length_32273_cov_7.776991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540843" accession="ERS11142809">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142809</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540843</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.3603</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.92584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_3603_length_27012_cov_3.650009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_3603_length_27012_cov_3.650009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738623_bin.166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Slackia_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.1266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540844" accession="ERS11142810">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142810</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540844</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.535</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540844</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.3217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_535_length_86813_cov_34.356473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_535_length_86813_cov_34.356473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746321_virus.535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540845" accession="ERS11142811">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142811</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540845</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746321_virus.967</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746321.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>379.346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746321) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567810) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_967_length_62528_cov_213.104738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8260869565217391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_16_1650__NODE_967_length_62528_cov_213.104738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_provirus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540846" accession="ERS11142812">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142812</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540846</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_provirus.189</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.1156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_189_length_131458_cov_37.638563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746344_bin.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_189_length_131458_cov_37.638563_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-605;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_provirus.629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540847" accession="ERS11142813">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142813</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540847</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_provirus.436</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540847</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.6642</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_436_length_93910_cov_29.288694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746344_bin.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_436_length_93910_cov_29.288694_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745948_virus.945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540848" accession="ERS11142814">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142814</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540848</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_provirus.724</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis provirus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.8319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_724_length_71810_cov_12.325527_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746344_bin.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_724_length_71810_cov_12.325527_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745922_virus.337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540849" accession="ERS11142815">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142815</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540849</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.1129</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.2997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_1129_length_55459_cov_4.792568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_1129_length_55459_cov_4.792568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746344_virus.1129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540850" accession="ERS11142816">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142816</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540850</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.1648</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.7042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_1648_length_42303_cov_21.992232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_1648_length_42303_cov_21.992232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540851" accession="ERS11142817">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142817</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540851</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.2100</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19860955218654155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540851</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.53551</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_2100_length_35376_cov_4.359104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_2100_length_35376_cov_4.359104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738662_virus.611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540852" accession="ERS11142818">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142818</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540852</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.2794</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540852</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.9822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_2794_length_27906_cov_5.174710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746344_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_2794_length_27906_cov_5.174710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738209_bin.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-822;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746344_virus.2794</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540853" accession="ERS11142819">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142819</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540853</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.359</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>309.846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_359_length_103237_cov_182.623711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9178082191780822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_359_length_103237_cov_182.623711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0368853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540854" accession="ERS11142820">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142820</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540854</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.430</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr130_1 virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>813.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_430_length_94704_cov_475.891162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7454545454545455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_430_length_94704_cov_475.891162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME268072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella stercorea</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738566_virus.370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; crAssphage cr130_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540855" accession="ERS11142821">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142821</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540855</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.6329</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01238986784140969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:32Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.2894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_6329_length_13011_cov_35.297754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_6329_length_13011_cov_35.297754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__CAG-110 sp900544405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745384_virus.4192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540856" accession="ERS11142822">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142822</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540856</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746344_virus.909</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746344.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>114.307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746344) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559424) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_909_length_63167_cov_67.290236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_O_20_2642__NODE_909_length_63167_cov_67.290236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745384_virus.591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540857" accession="ERS11142823">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142823</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540857</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_provirus.1117</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17431493571559326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1117_length_54467_cov_6.906251_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746345_bin.220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1117_length_54467_cov_6.906251_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.2393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540858" accession="ERS11142824">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142824</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540858</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_provirus.20</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06396789356545554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540858</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.5649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_20_length_329360_cov_9.616403_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_20_length_329360_cov_9.616403_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus_C;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_virus.1312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540859" accession="ERS11142825">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142825</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540859</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_provirus.4052</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. provirus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540859</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.9243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_4052_length_21988_cov_34.433116_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746345_bin.235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_4052_length_21988_cov_34.433116_1_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_provirus.13346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540860" accession="ERS11142826">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142826</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540860</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_provirus.852</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540860</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_852_length_63823_cov_10.906347_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746345_bin.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8064516129032258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_852_length_63823_cov_10.906347_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerostipes;s__Anaerostipes hadrus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.2792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540861" accession="ERS11142827">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142827</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540861</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.1208</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Klebsiella phage P-K7R virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>474.464</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1208_length_51750_cov_271.664467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6533333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1208_length_51750_cov_271.664467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME239725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Phocaeicola;s__Phocaeicola plebeius_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.1208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Klebsiella phage P-K7R</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540862" accession="ERS11142828">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142828</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540862</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.141</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3032310127651904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540862</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.9752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_141_length_158221_cov_13.111854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5116279069767442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_141_length_158221_cov_13.111854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME260250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp900759995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540863" accession="ERS11142829">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142829</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540863</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.1533</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vectrevirus virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>5.538234328848e-4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540863</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1533_length_44243_cov_6.122266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8245614035087719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1533_length_44243_cov_6.122266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.2022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae; Molineuxvirinae; Vectrevirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540864" accession="ERS11142830">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142830</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540864</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.1661</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540864</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.01749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1661_length_42106_cov_4.424017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42105263157894735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1661_length_42106_cov_4.424017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540865" accession="ERS11142831">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142831</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540865</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.1821</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.39311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1821_length_39381_cov_4.619199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746345_bin.282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_1821_length_39381_cov_4.619199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.1821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540866" accession="ERS11142832">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142832</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540866</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.2034</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.0593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_2034_length_36230_cov_18.653943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746345_bin.119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_2034_length_36230_cov_18.653943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.2034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540867" accession="ERS11142833">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142833</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540867</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.2595</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03997797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.68493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_2595_length_30413_cov_4.780030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_2595_length_30413_cov_4.780030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746345_virus.2595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540868" accession="ERS11142834">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142834</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540868</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.32229</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540868</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>5.45158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_32229_length_3764_cov_2.806075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_32229_length_3764_cov_2.806075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME157422</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Eremiobacterota;c__Xenobia;o__Xenobiales;f__Xenobiaceae;g__Bruticola;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738227_virus.13835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540869" accession="ERS11142835">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142835</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540869</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.4560</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17129856875493005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.93286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_4560_length_20050_cov_4.341361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_4560_length_20050_cov_4.341361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738265_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;s__Clostridium sp900540255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738659_virus.2586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540870" accession="ERS11142836">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142836</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540870</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.745</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06414554127551365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>301.062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_745_length_68647_cov_172.360522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_745_length_68647_cov_172.360522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747782_bin.118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D berlinense</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540871" accession="ERS11142837">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142837</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540871</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746345_virus.937</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746345.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746345) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560644) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>300</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_937_length_60387_cov_19.578478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7555555555555555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_H_24_1651__NODE_937_length_60387_cov_19.578478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738251_virus.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540872" accession="ERS11142838">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142838</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540872</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_provirus.1108</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>339.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1108_length_70335_cov_212.828048_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746349_bin.171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1108_length_70335_cov_212.828048_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900541925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_provirus.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540873" accession="ERS11142839">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142839</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540873</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_provirus.1694</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.2389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1694_length_54387_cov_5.617124_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1694_length_54387_cov_5.617124_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME080845</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella pectinovora</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738205_provirus.488</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540874" accession="ERS11142840">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142840</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540874</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_provirus.2903</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540874</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.3154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_2903_length_38319_cov_24.730584_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746349_bin.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_2903_length_38319_cov_24.730584_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_provirus.2903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540875" accession="ERS11142841">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142841</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540875</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_provirus.4724</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.4562</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_4724_length_27806_cov_23.730210_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746349_bin.470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_4724_length_27806_cov_23.730210_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME125875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFFH01;s__SFFH01 sp900542395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_provirus.4724</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540876" accession="ERS11142842">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142842</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540876</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_provirus.913</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>120.897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_913_length_77526_cov_76.244032_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746349_bin.164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_913_length_77526_cov_76.244032_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900549865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738612_provirus.660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540877" accession="ERS11142843">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142843</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540877</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.1258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.2034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1258_length_65136_cov_32.547933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5172413793103449</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1258_length_65136_cov_32.547933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745582_virus.413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540878" accession="ERS11142844">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142844</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540878</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.1661</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>81.3022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1661_length_55147_cov_49.428545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_1661_length_55147_cov_49.428545</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.1661</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540879" accession="ERS11142845">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142845</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540879</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.195</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05247797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>92.8926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_195_length_156742_cov_56.263186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6078431372549019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_195_length_156742_cov_56.263186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540880" accession="ERS11142846">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142846</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540880</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.2599</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Aeromonas phage PZL-Ah8 virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540880</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.81003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_2599_length_41336_cov_5.912989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_2599_length_41336_cov_5.912989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.2599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae; Studiervirinae; unclassified Studiervirinae; Aeromonas phage PZL-Ah8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540881" accession="ERS11142847">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142847</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540881</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.2916</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.7683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_2916_length_38203_cov_10.212322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_2916_length_38203_cov_10.212322</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745975_bin.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__UMGS124;g__UMGS124;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.1554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540882" accession="ERS11142848">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142848</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540882</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.3124</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.6347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_3124_length_36538_cov_6.488467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_3124_length_36538_cov_6.488467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_provirus.1272893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540883" accession="ERS11142849">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142849</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540883</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.359</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.8326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_359_length_120565_cov_17.037016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.918918918918919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_359_length_120565_cov_17.037016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540884" accession="ERS11142850">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142850</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540884</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.4103</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_4103_length_30575_cov_7.658830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_4103_length_30575_cov_7.658830</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.4103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540885" accession="ERS11142851">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142851</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540885</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_506_length_103001_cov_7.662877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6578947368421053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_506_length_103001_cov_7.662877</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME147678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_G</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738643_virus.497</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540886" accession="ERS11142852">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142852</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540886</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.63</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.0465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_63_length_246263_cov_38.912838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7017543859649122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_63_length_246263_cov_38.912838</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540887" accession="ERS11142853">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142853</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540887</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746349_virus.909</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746349.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.78891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746349) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568042) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_909_length_77837_cov_5.870293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585730</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_2_1315__NODE_909_length_77837_cov_5.870293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738162_virus.273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540888" accession="ERS11142854">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142854</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540888</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_provirus.1070</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.6117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1070_length_62571_cov_15.617867_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1070_length_62571_cov_15.617867_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738265_virus.401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540889" accession="ERS11142855">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142855</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540889</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_provirus.1685</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540889</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1685_length_48891_cov_10.510325_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1685_length_48891_cov_10.510325_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_provirus.507</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540890" accession="ERS11142856">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142856</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540890</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_provirus.2181</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10914096916299568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540890</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>244.662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2181_length_41912_cov_145.968232_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2181_length_41912_cov_145.968232_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0283404</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540891" accession="ERS11142857">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142857</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540891</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_provirus.30</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.1701</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_30_length_304173_cov_52.689864_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746365_bin.201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.34782608695652173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_30_length_304173_cov_52.689864_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_provirus.30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540892" accession="ERS11142858">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142858</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540892</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_provirus.414</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540892</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_414_length_103377_cov_5.456544_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746365_bin.173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_414_length_103377_cov_5.456544_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738664_provirus.351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540893" accession="ERS11142859">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142859</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540893</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_provirus.710</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.34035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_710_length_77279_cov_4.994858_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746365_bin.223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_710_length_77279_cov_4.994858_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738574_bin.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Beduini;s__Beduini sp900550005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_provirus.710</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540894" accession="ERS11142860">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142860</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540894</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.1009</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>137.947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1009_length_64375_cov_75.891319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1009_length_64375_cov_75.891319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Mediterraneibacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.1009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540895" accession="ERS11142861">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142861</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540895</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.1258</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>353</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1258_length_57486_cov_9.336132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1258_length_57486_cov_9.336132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738575_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__CAG-279;s__CAG-279 sp000437795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738234_virus.618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540896" accession="ERS11142862">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142862</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540896</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.1527</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540896</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.1045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1527_length_52100_cov_31.182362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1527_length_52100_cov_31.182362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745425_virus.296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540897" accession="ERS11142863">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142863</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540897</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.1909</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540897</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>203.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1909_length_45569_cov_117.186824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_1909_length_45569_cov_117.186824</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738212_provirus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540898" accession="ERS11142864">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142864</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540898</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.2155</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Toutatis virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.8038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2155_length_42175_cov_15.135090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7948717948717948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2155_length_42175_cov_15.135090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_virus.1312</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Toutatisvirus; Faecalibacterium virus Toutatis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540899" accession="ERS11142865">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142865</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540899</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.2396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540899</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.8031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2396_length_39374_cov_18.660279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8529411764705882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2396_length_39374_cov_18.660279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia;s__Blautia stercoris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745582_virus.1002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540900" accession="ERS11142866">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142866</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540900</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.2733</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vegasvirus virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.3586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2733_length_36452_cov_10.903038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2733_length_36452_cov_10.903038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.2733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Gochnauervirinae; Vegasvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540901" accession="ERS11142867">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142867</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540901</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.2875</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2875_length_35320_cov_14.902278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746365_bin.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_2875_length_35320_cov_14.902278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745975_bin.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__UMGS124;g__UMGS124;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.1554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540902" accession="ERS11142868">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142868</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540902</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.3831</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.4808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_3831_length_28773_cov_24.804363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7727272727272727</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_3831_length_28773_cov_24.804363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_provirus.470</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540903" accession="ERS11142869">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142869</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540903</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00991189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>175.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_462_length_98060_cov_98.043130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.922077922077922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_462_length_98060_cov_98.043130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540904" accession="ERS11142870">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142870</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540904</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.55</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540904</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>137.391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_55_length_234872_cov_78.652126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7931034482758621</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_55_length_234872_cov_78.652126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738630_virus.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540905" accession="ERS11142871">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142871</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540905</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.7202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04666904456842997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.30781</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_7202_length_18041_cov_3.877199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_7202_length_18041_cov_3.877199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.7202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540906" accession="ERS11142872">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142872</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540906</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746365_virus.9605</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7746365.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18368843659633977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>382.605</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746365) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568022) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_9605_length_14370_cov_215.009935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-04-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_E_16_1654__NODE_9605_length_14370_cov_215.009935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738191_virus.3624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540907" accession="ERS11142873">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142873</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540907</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_provirus.1677</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540907</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.24174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1677_length_41181_cov_4.849455_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746398_bin.123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1677_length_41181_cov_4.849455_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746113_bin.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900541925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745291_provirus.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540908" accession="ERS11142874">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142874</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540908</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_provirus.328</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>82.4849</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_328_length_104510_cov_48.995863_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.46875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_328_length_104510_cov_48.995863_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738177_provirus.254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540909" accession="ERS11142875">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142875</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540909</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_provirus.944</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.9687</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_944_length_59662_cov_14.171872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_944_length_59662_cov_14.171872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745967_virus.496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540910" accession="ERS11142876">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142876</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540910</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_virus.1441</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18243392070484585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.4176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1441_length_45440_cov_9.258823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6341463414634146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1441_length_45440_cov_9.258823</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7747782_bin.309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Duodenibacillus;s__Duodenibacillus sp900544255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.1441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540911" accession="ERS11142877">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142877</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540911</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_virus.1786</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.4176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1786_length_39413_cov_13.582520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1786_length_39413_cov_13.582520</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.1786</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540912" accession="ERS11142878">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142878</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540912</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_virus.1956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540912</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>190.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1956_length_37124_cov_112.960888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_1956_length_37124_cov_112.960888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME079077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000434935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738230_virus.458</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540913" accession="ERS11142879">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142879</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540913</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_virus.2769</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.23525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_2769_length_28579_cov_4.762087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_2769_length_28579_cov_4.762087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME024263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Tannerellaceae;g__Parabacteroides;s__Parabacteroides sp900549585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0328206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540914" accession="ERS11142880">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142880</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540914</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_virus.356</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03997797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>611.472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_356_length_100633_cov_360.364901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.927536231884058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_356_length_100633_cov_360.364901</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540915" accession="ERS11142881">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142881</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540915</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_virus.4801</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03588928786936091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.47474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_4801_length_18084_cov_3.697229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_4801_length_18084_cov_3.697229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;s__CAG-353 sp900066885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.4801</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540916" accession="ERS11142882">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142882</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540916</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746398_virus.738</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746398.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.0066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746398) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559822) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_738_length_68350_cov_36.387445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_7_2399__NODE_738_length_68350_cov_36.387445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.738</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540917" accession="ERS11142883">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142883</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540917</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_provirus.130</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540917</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.6298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_130_length_235648_cov_49.642787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746411_bin.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_130_length_235648_cov_49.642787_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540919" accession="ERS11142884">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142884</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540919</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_provirus.182</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Brigit provirus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.4096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_182_length_203798_cov_33.876355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5121951219512195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_182_length_203798_cov_33.876355_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Negativibacillus;s__Negativibacillus sp900547015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Brigitvirus; Faecalibacterium virus Brigit</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540920" accession="ERS11142885">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142885</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540920</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_provirus.2440</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona provirus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540920</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_2440_length_55703_cov_8.599971_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_2440_length_55703_cov_8.599971_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.1666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540921" accession="ERS11142886">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142886</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540921</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_provirus.3191</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>150.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3191_length_46011_cov_93.173575_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3191_length_46011_cov_93.173575_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_virus.1407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540922" accession="ERS11142887">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142887</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540922</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_provirus.4748</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540922</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2318</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_4748_length_35086_cov_13.897484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_4748_length_35086_cov_13.897484_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540923" accession="ERS11142888">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142888</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540923</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_provirus.714</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_714_length_107463_cov_13.056749_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6226415094339622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_714_length_107463_cov_13.056749_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_provirus.714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540924" accession="ERS11142889">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142889</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540924</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_provirus.983</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540924</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_983_length_91439_cov_6.626519_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746411_bin.521</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_983_length_91439_cov_6.626519_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-510;s__CAG-510 sp000434615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738594_virus.1978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540925" accession="ERS11142890">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142890</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540925</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.11925</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.3923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_11925_length_16513_cov_24.574653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_11925_length_16513_cov_24.574653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.4145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540926" accession="ERS11142891">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142891</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540926</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.14274</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11941492641683568</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>98.0921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_14274_length_14064_cov_57.544799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_14274_length_14064_cov_57.544799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.7389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540927" accession="ERS11142892">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142892</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540927</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540927</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>58.7347</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_162_length_212516_cov_36.401240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5409836065573771</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_162_length_212516_cov_36.401240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540928" accession="ERS11142893">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142893</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540928</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.1979</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_1979_length_63398_cov_6.076910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_1979_length_63398_cov_6.076910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738214_virus.237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540929" accession="ERS11142894">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142894</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540929</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.2308</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Lambdavirus virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540929</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.0245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_2308_length_57707_cov_34.049193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_2308_length_57707_cov_34.049193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.2308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lambdavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540930" accession="ERS11142895">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142895</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540930</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.256</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_256_length_177750_cov_12.692255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_256_length_177750_cov_12.692255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540931" accession="ERS11142896">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142896</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540931</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.3008</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>181.414</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3008_length_48061_cov_123.616664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5416666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3008_length_48061_cov_123.616664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_virus.1324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540932" accession="ERS11142897">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142897</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540932</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.3286</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540932</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-28T19:46:24Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.0013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3286_length_45152_cov_11.321597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3286_length_45152_cov_11.321597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540933" accession="ERS11142898">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142898</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540933</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.3667</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08480176211453745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>42.1666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3667_length_41972_cov_25.061320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5454545454545454</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3667_length_41972_cov_25.061320</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738588_bin.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Thermoplasmatota;c__Thermoplasmata;o__Methanomassiliicoccales;f__Methanomethylophilaceae;g__UBA71;s__UBA71 sp006954465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540934" accession="ERS11142899">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142899</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540934</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.3951</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3951_length_39972_cov_5.875473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_3951_length_39972_cov_5.875473</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.3951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540935" accession="ERS11142900">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142900</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540935</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.4266</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:31Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.1246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_4266_length_37960_cov_11.999155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5945945945945946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_4266_length_37960_cov_11.999155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.4266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540936" accession="ERS11142901">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142901</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540936</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.4708</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540936</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>557</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_4708_length_35274_cov_8.772736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7906976744186046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_4708_length_35274_cov_8.772736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_virus.805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540937" accession="ERS11142902">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142902</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540937</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.5612</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>400.447</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_5612_length_31171_cov_238.480543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_5612_length_31171_cov_238.480543</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME099131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000435075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_virus.1434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540938" accession="ERS11142903">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142903</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540938</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.6309</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11513032049760293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.4514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_6309_length_28306_cov_14.150873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_6309_length_28306_cov_14.150873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738188_virus.809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540939" accession="ERS11142904">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142904</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540939</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.7075</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2330383012181856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540939</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.68737</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_7075_length_25624_cov_4.228363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_7075_length_25624_cov_4.228363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738624_bin.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Thermoplasmatota;c__Thermoplasmata;o__Methanomassiliicoccales;f__Methanomethylophilaceae;g__ISO4-G1;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.1988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540940" accession="ERS11142905">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142905</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540940</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.7942</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.407</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_7942_length_23296_cov_12.133554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_7942_length_23296_cov_12.133554</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME091217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__UMGS973;s__UMGS973 sp900547295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738266_virus.1785</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540941" accession="ERS11142906">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142906</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540941</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746411_virus.89716</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7746411.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>4.62107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746411) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4568013) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_89716_length_2409_cov_2.404374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_A_2_1112__NODE_89716_length_2409_cov_2.404374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Anaerovibrio;s__Anaerovibrio sp900548165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745545_virus.11888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540942" accession="ERS11142907">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142907</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540942</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746433_provirus.202</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_202_length_104068_cov_6.794578_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746433_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9767441860465116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_202_length_104068_cov_6.794578_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746433_provirus.202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540943" accession="ERS11142908">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142908</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540943</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746433_provirus.434</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_434_length_72104_cov_21.983950_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5769230769230769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_434_length_72104_cov_21.983950_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745329_provirus.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540944" accession="ERS11142909">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142909</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540944</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746433_provirus.893</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05743392070484582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.4699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_893_length_46023_cov_28.631611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_893_length_46023_cov_28.631611_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Veillonellales;f__Dialisteraceae;g__Dialister;s__Dialister sp000434475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_provirus.19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540945" accession="ERS11142910">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142910</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540945</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746433_virus.1618</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06133297914365541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4365</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_1618_length_29002_cov_7.559343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_1618_length_29002_cov_7.559343</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738536_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;s__UBA1777 sp900546515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738623_virus.2878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540946" accession="ERS11142911">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142911</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540946</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746433_virus.2204</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 virus assembled from ERR7746433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15446967015660706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.2817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_2204_length_22366_cov_29.321055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_2204_length_22366_cov_29.321055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745687_bin.245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA6382;s__UBA6382 sp900557555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.2946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540947" accession="ERS11142912">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142912</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540947</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746433_virus.587</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7746433.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746433) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559630) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_587_length_61075_cov_9.611413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_18_2327__NODE_587_length_61075_cov_9.611413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002251385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738141_virus.327</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540948" accession="ERS11142913">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142913</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540948</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746436_provirus.277</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746436.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07349942002795122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.7445</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746436) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559800) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_277_length_74068_cov_9.650728_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746436_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_277_length_74068_cov_9.650728_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747128_provirus.718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540949" accession="ERS11142914">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142914</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540949</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746436_provirus.850</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746436.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540949</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.4536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746436) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559800) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_850_length_37515_cov_17.889551_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746436_bin.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_850_length_37515_cov_17.889551_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738224_virus.1626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540950" accession="ERS11142915">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142915</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540950</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746436_virus.166</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746436.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540950</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>61.5252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746436) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559800) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_166_length_97310_cov_37.823249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_166_length_97310_cov_37.823249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_provirus.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540951" accession="ERS11142916">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142916</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540951</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746436_virus.288</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746436.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746436) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559800) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_288_length_72460_cov_10.815178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.71875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_288_length_72460_cov_10.815178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-138;g__RUG472;s__RUG472 sp900545265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.1153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540952" accession="ERS11142917">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142917</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540952</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746436_virus.489</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746436.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>30.4757</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746436) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559800) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_489_length_52787_cov_18.406450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_489_length_52787_cov_18.406450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME011266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__CAG-272;g__UMGS1696;s__UMGS1696 sp900554225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540953" accession="ERS11142918">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142918</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540953</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746436_virus.635</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746436.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.2675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746436) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559800) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_635_length_45165_cov_10.421664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_635_length_45165_cov_10.421664</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738534_virus.1068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540954" accession="ERS11142919">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142919</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540954</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746436_virus.757</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746436.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540954</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.6842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746436) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559800) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_757_length_40301_cov_18.019541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_G_5_2395__NODE_757_length_40301_cov_18.019541</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738205_virus.821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540955" accession="ERS11142920">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142920</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540955</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746440_provirus.162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746440.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15966643118087134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746440) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_162_length_124872_cov_8.298305_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746440_bin.198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_162_length_124872_cov_8.298305_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737966_provirus.195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540956" accession="ERS11142921">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142921</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540956</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746440_provirus.752</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746440.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540956</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.3826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746440) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_752_length_50552_cov_12.820129_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_752_length_50552_cov_12.820129_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_provirus.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540957" accession="ERS11142922">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142922</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540957</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746440_virus.1641</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746440.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.3197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746440) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_1641_length_29808_cov_9.426693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_1641_length_29808_cov_9.426693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738550_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Muribaculaceae;g__Sodaliphilus;s__Sodaliphilus sp004557565</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746440_virus.1641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540958" accession="ERS11142923">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142923</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540958</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746440_virus.265</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured CrAss-like virus sp. virus assembled from ERR7746440.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.9706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746440) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_265_length_92480_cov_12.067963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_265_length_92480_cov_12.067963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; CrAss-like virus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540959" accession="ERS11142924">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142924</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540959</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746440_virus.5979</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7746440.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>116.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746440) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559632) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_5979_length_10870_cov_71.705921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_8_2320__NODE_5979_length_10870_cov_71.705921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745890_virus.12584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540960" accession="ERS11142925">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142925</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540960</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_provirus.1300</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540960</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.15092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_1300_length_38118_cov_4.660156_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746465_bin.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_1300_length_38118_cov_4.660156_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.1837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540961" accession="ERS11142926">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142926</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540961</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_provirus.303</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04801252640094042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540961</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>319.817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_303_length_89576_cov_224.140460_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_303_length_89576_cov_224.140460_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.2119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540962" accession="ERS11142927">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142927</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540962</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_provirus.870</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.1024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_870_length_50513_cov_16.618150_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_870_length_50513_cov_16.618150_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745828_virus.992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540963" accession="ERS11142928">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142928</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540963</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_virus.1235</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13363893334599836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540963</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.22843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_1235_length_39594_cov_5.170104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_1235_length_39594_cov_5.170104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747128_virus.831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540964" accession="ERS11142929">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142929</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540964</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_virus.1740</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540964</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.4861</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_1740_length_30595_cov_9.562553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746465_bin.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_1740_length_30595_cov_9.562553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540965" accession="ERS11142930">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142930</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540965</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_virus.251</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5284</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_251_length_96119_cov_6.604413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6551724137931034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_251_length_96119_cov_6.604413</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter rectalis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738186_virus.233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540966" accession="ERS11142931">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142931</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540966</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_virus.538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7884</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_538_length_65307_cov_6.142250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_538_length_65307_cov_6.142250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540967" accession="ERS11142932">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142932</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540967</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746465_virus.8467</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746465.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5729</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746465) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559825) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_8467_length_7045_cov_8.961682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_15_2475__NODE_8467_length_7045_cov_8.961682</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746030_virus.7705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540968" accession="ERS11142933">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142933</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540968</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746660_provirus.394</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.82822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_1_1524__NODE_394_length_44893_cov_4.850098_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746660_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_1_1524__NODE_394_length_44893_cov_4.850098_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-628;s__CAG-628 sp000438415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746660_provirus.394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540969" accession="ERS11142934">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142934</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540969</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746660_virus.131</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr271_1 virus assembled from ERR7746660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10247797356828196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>53.5617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_1_1524__NODE_131_length_91734_cov_30.130988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.873015873015873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_1_1524__NODE_131_length_91734_cov_30.130988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738184_virus.90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr271_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540970" accession="ERS11142935">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142935</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540970</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746660_virus.564</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746660.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540970</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>31.6606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746660) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561498) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_1_1524__NODE_564_length_35869_cov_16.716138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.42857142857142855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585779</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_1_1524__NODE_564_length_35869_cov_16.716138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746660_virus.564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540971" accession="ERS11142936">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142936</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540971</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_provirus.1354</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11445538818076476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1354_length_68429_cov_10.443732_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5263157894736842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1354_length_68429_cov_10.443732_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.1354</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540972" accession="ERS11142937">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142937</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540972</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_provirus.1952</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.4251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1952_length_55276_cov_14.042102_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746688_bin.419</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7307692307692307</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1952_length_55276_cov_14.042102_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0250744</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540973" accession="ERS11142938">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142938</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540973</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_provirus.2349</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540973</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2349_length_48946_cov_10.485584_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746688_bin.276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2349_length_48946_cov_10.485584_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745890_bin.370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__QAMH01;g__W2P13-069;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.2151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540974" accession="ERS11142939">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142939</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540974</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_provirus.290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus provirus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540974</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>94.4789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_290_length_154564_cov_58.549671_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_290_length_154564_cov_58.549671_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.290</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540975" accession="ERS11142940">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142940</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540975</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_provirus.40</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_40_length_334446_cov_32.154018_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746688_bin.424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_40_length_334446_cov_32.154018_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__TF01-11;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540976" accession="ERS11142941">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142941</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540976</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_provirus.540</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_540_length_113659_cov_11.282483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_540_length_113659_cov_11.282483_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746111_virus.311</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540977" accession="ERS11142942">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142942</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540977</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_provirus.7260</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_7260_length_22006_cov_10.665101_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_7260_length_22006_cov_10.665101_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_provirus.7260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540978" accession="ERS11142943">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142943</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540978</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.1034</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Andromedavirus virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540978</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.7408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1034_length_80152_cov_37.253450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8108108108108109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1034_length_80152_cov_37.253450</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.1034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Andromedavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540979" accession="ERS11142944">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142944</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540979</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.1503</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage LoVEphage virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1503_length_64098_cov_6.933959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5384615384615384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1503_length_64098_cov_6.933959</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.1503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Bacteroides phage LoVEphage</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540980" accession="ERS11142945">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142945</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540980</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.1678</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>91.8946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1678_length_60490_cov_52.028818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_1678_length_60490_cov_52.028818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME026639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900548625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540981" accession="ERS11142946">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142946</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540981</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.18025</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540981</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>45.2251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_18025_length_10521_cov_26.015380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_18025_length_10521_cov_26.015380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME023798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900543155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738260_virus.5572</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540983" accession="ERS11142948">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142948</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540983</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.2290</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.1364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2290_length_49777_cov_48.393239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2290_length_49777_cov_48.393239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738148_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Selenomonadales;f__Selenomonadaceae;g__Selenomonas_A;s__Selenomonas_A sp900769615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540984" accession="ERS11142949">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142949</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540984</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.2534</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8574339207048457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.5015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2534_length_46670_cov_14.390638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2534_length_46670_cov_14.390638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Anaerotignaceae;g__Anaerotignum;s__Anaerotignum faecicola</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0345130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540985" accession="ERS11142950">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142950</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540985</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.2826</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>444.933</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>78</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2826_length_43612_cov_250.180085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5652173913043478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2826_length_43612_cov_250.180085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745823_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__SFJ001;s__SFJ001 sp004555865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_virus.928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540986" accession="ERS11142951">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142951</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540986</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.302</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>66.227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_302_length_152663_cov_37.805264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5873015873015873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_302_length_152663_cov_37.805264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738579_virus.137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540987" accession="ERS11142952">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142952</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540987</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.3259</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.7512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_3259_length_39477_cov_15.416751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_3259_length_39477_cov_15.416751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__UBA9502;s__UBA9502 sp003506385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.3259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540988" accession="ERS11142953">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142953</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540988</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.3455</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>115.805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_3455_length_37946_cov_67.595078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_3455_length_37946_cov_67.595078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME005405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus infantis_I</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.3455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540989" accession="ERS11142954">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142954</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540989</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.3675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.1597</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_3675_length_36445_cov_11.525022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7419354838709677</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_3675_length_36445_cov_11.525022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME103816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;s__Dorea_A longicatena</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.3675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540990" accession="ERS11142955">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142955</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540990</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.38935</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00743392070484581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540990</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>123.788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_38935_length_5253_cov_72.074189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_38935_length_5253_cov_72.074189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738588_bin.321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger sp004555405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738158_virus.17872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540992" accession="ERS11142957">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142957</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540992</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.476</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>64.2391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_476_length_122037_cov_35.687693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_476_length_122037_cov_35.687693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.476</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540993" accession="ERS11142958">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142958</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540993</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.6003</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540993</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_6003_length_25484_cov_6.923604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_6003_length_25484_cov_6.923604</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME157782</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp900547745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746365_virus.2065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540994" accession="ERS11142959">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142959</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540994</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.6355</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.5438</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_6355_length_24388_cov_32.113817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_6355_length_24388_cov_32.113817</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME095879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus sp900543065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.6355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540995" accession="ERS11142960">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142960</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540995</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.7581</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.735900000000001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_7581_length_21250_cov_6.520569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_7581_length_21250_cov_6.520569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.7581</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540996" accession="ERS11142961">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142961</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540996</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.905</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.0115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_905_length_86567_cov_10.103573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_905_length_86567_cov_10.103573</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540998" accession="ERS11142963">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142963</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540998</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746692_provirus.784</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746692.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.3666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746692) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561431) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_784_length_35407_cov_6.353071_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746692_bin.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.93</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_784_length_35407_cov_6.353071_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745433_virus.1725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540999" accession="ERS11142964">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142964</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540999</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746692_virus.185</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746692.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.2825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746692) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561431) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_185_length_85703_cov_10.649102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.717391304347826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_185_length_85703_cov_10.649102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium_P;s__Clostridium_P ventriculi</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738245_virus.379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541000" accession="ERS11142965">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142965</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541000</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746692_virus.42</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746692.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746692) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561431) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_42_length_181224_cov_14.845750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6216216216216216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_42_length_181224_cov_14.845750</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746692_virus.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541001" accession="ERS11142966">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142966</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541001</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746692_virus.647</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746692.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746692) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561431) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_647_length_39480_cov_13.063828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.868421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_647_length_39480_cov_13.063828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745411_provirus.835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541002" accession="ERS11142967">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142967</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541002</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746721_provirus.365</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746721.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10860324158549196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746721) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_365_length_46376_cov_10.016696_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7916666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_365_length_46376_cov_10.016696_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746184_virus.2079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541003" accession="ERS11142968">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142968</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541003</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746721_virus.1044</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746721.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08243392070484583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541003</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746721) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_1044_length_26030_cov_5.169691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_1044_length_26030_cov_5.169691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738580_provirus.733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541004" accession="ERS11142969">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142969</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541004</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746721_virus.180</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7746721.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>83.5836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746721) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_180_length_67606_cov_42.472168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_180_length_67606_cov_42.472168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738244_provirus.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541006" accession="ERS11142971">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142971</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541006</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746721_virus.7865</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microvirus sp. virus assembled from ERR7746721.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.59039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746721) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_7865_length_6605_cov_3.508578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_7865_length_6605_cov_3.508578</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746721_virus.7865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microvirus sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541007" accession="ERS11142972">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142972</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541007</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746738_provirus.69</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746738.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541007</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.8025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746738) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561505) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_69_length_172153_cov_41.672616_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746738_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_69_length_172153_cov_41.672616_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745835_virus.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541008" accession="ERS11142973">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142973</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541008</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746738_virus.1611</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746738.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.16497797356828195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541008</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746738) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561505) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_1611_length_20996_cov_6.949233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_1611_length_20996_cov_6.949233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.1518</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541009" accession="ERS11142974">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142974</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541009</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746738_virus.504</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746738.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541009</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.2895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746738) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561505) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_504_length_55111_cov_9.441999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_504_length_55111_cov_9.441999</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_virus.268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541010" accession="ERS11142975">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142975</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541010</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746738_virus.745</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746738.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541010</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.5938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746738) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561505) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_745_length_41047_cov_24.396363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6538461538461539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_9_1519__NODE_745_length_41047_cov_24.396363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME115165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__F23-B02;s__F23-B02 sp002472405</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746738_virus.745</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541012" accession="ERS11142977">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142977</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541012</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746739_virus.1367</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746739.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18992024567263308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541012</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.0043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746739) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561457) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_1_1512__NODE_1367_length_24811_cov_13.124768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746739_bin.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_1_1512__NODE_1367_length_24811_cov_13.124768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746030_bin.308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__UBA1081;s__UBA1081 sp900543395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746739_virus.1367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541013" accession="ERS11142978">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142978</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541013</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746739_virus.461</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746739.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541013</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.0576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746739) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561457) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_1_1512__NODE_461_length_52461_cov_13.751069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_1_1512__NODE_461_length_52461_cov_13.751069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746757_virus.850</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541014" accession="ERS11142979">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142979</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541014</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_provirus.10</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.8292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_10_length_343173_cov_22.539374_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7714285714285715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_10_length_343173_cov_22.539374_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_provirus.501</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541015" accession="ERS11142980">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142980</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541015</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_provirus.358</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>309</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_358_length_81260_cov_6.992806_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7555555555555555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_358_length_81260_cov_6.992806_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745544_provirus.252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541016" accession="ERS11142981">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142981</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541016</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_provirus.76</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.19493392070484583</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.3266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>406</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_76_length_168089_cov_10.393496_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746741_bin.126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_76_length_168089_cov_10.393496_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541017" accession="ERS11142982">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142982</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541017</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.1054</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.3511</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1054_length_45223_cov_13.611593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746741_bin.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1054_length_45223_cov_13.611593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745947_virus.1976</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541018" accession="ERS11142983">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142983</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541018</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.1178</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Parabacteroides phage PDS1 virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4629947229551451</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>3286.77</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1178_length_42218_cov_1780.142711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1178_length_42218_cov_1780.142711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0280262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Parabacteroides phage PDS1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541019" accession="ERS11142984">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142984</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541019</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.1259</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.4787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>566</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1259_length_40674_cov_52.829593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1259_length_40674_cov_52.829593</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541021" accession="ERS11142986">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142986</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541021</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.1539</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541021</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1539_length_35934_cov_5.861199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1539_length_35934_cov_5.861199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_C</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745854_virus.2645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541022" accession="ERS11142987">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142987</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541022</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.1814</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.1556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1814_length_31705_cov_15.800209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746741_bin.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6956521739130435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1814_length_31705_cov_15.800209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746365_bin.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UBA5884;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738534_virus.1900</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541023" accession="ERS11142988">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142988</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541023</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.2152</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.9539</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_2152_length_28033_cov_27.803441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_2152_length_28033_cov_27.803441</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME019335</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900546535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.1358</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541024" accession="ERS11142989">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142989</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541024</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.272</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541024</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>184.402</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_272_length_94048_cov_102.421162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.56</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_272_length_94048_cov_102.421162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541025" accession="ERS11142990">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142990</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541025</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.410</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>294.073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_410_length_76576_cov_163.626505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_410_length_76576_cov_163.626505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.410</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541027" accession="ERS11142992">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142992</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541027</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.828</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>76.5079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_828_length_51865_cov_44.879142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4838709677419355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_828_length_51865_cov_44.879142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738147_virus.298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541028" accession="ERS11142993">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142993</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541028</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.991</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>169.058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_991_length_47148_cov_93.969259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_991_length_47148_cov_93.969259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746465_virus.878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541029" accession="ERS11142994">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142994</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541029</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_provirus.494</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.1431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_494_length_56977_cov_7.809982_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_494_length_56977_cov_7.809982_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738577_virus.832</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541030" accession="ERS11142995">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142995</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541030</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_virus.1135</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541030</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.8367</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_1135_length_33750_cov_11.056930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_1135_length_33750_cov_11.056930</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746742_virus.1135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541031" accession="ERS11142996">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142996</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541031</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_virus.179</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.7435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_179_length_100662_cov_20.864483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9479166666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_179_length_100662_cov_20.864483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746742_virus.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541032" accession="ERS11142997">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142997</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541032</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_virus.243</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>303.628</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_243_length_87714_cov_172.847439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_243_length_87714_cov_172.847439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746742_virus.243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541033" accession="ERS11142998">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142998</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541033</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_virus.3361</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06822090574375078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541033</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.2994</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_3361_length_15634_cov_11.529665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_3361_length_15634_cov_11.529665</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746742_virus.3361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541035" accession="ERS11143000">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143000</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541035</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_virus.770</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9294333617490468</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.2191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_770_length_43332_cov_18.272570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6764705882352942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_770_length_43332_cov_18.272570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746742_virus.770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541036" accession="ERS11143001">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143001</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541036</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_virus.844</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Viruses virus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>169.437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_844_length_40536_cov_94.559381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_844_length_40536_cov_94.559381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738626_virus.2057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541037" accession="ERS11143002">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143002</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541037</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746748_provirus.190</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746748.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541037</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746748) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_190_length_130128_cov_6.437667_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746748_bin.114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_190_length_130128_cov_6.437667_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-533;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.1871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541038" accession="ERS11143003">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143003</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541038</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746748_provirus.636</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746748.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.4691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746748) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_636_length_69768_cov_52.982824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_636_length_69768_cov_52.982824_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_virus.962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541039" accession="ERS11143004">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143004</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541039</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746748_virus.1220</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746748.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541039</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.9227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746748) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_1220_length_43164_cov_9.220206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6190476190476191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_1220_length_43164_cov_9.220206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-110;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.1220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541041" accession="ERS11143006">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143006</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541041</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746748_virus.215</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746748.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.9895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746748) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_215_length_123480_cov_15.263430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5945945945945946</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_215_length_123480_cov_15.263430</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738253_virus.287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541042" accession="ERS11143007">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143007</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541042</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746748_virus.324</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7746748.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09801627478646756</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>251.914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746748) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_324_length_100132_cov_146.559462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9090909090909092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_324_length_100132_cov_146.559462</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541043" accession="ERS11143008">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143008</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541043</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746748_virus.713</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746748.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>71.4217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746748) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_713_length_64459_cov_40.998478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_713_length_64459_cov_40.998478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745432_virus.483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541044" accession="ERS11143009">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143009</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541044</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_provirus.20</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.4368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_20_length_301549_cov_17.927101_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746751_bin.102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_20_length_301549_cov_17.927101_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738192_bin.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Sphaerochaetales;f__Sphaerochaetaceae;g__UBA9732;s__UBA9732 sp001940825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_provirus.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541045" accession="ERS11143010">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143010</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541045</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.1063</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15863157861825958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.6998</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1063_length_53163_cov_13.426591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1063_length_53163_cov_13.426591</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746349_virus.1755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541046" accession="ERS11143011">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143011</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541046</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.1215</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.91056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1215_length_48406_cov_4.634836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1215_length_48406_cov_4.634836</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.1215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541047" accession="ERS11143012">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143012</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541047</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.1383</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541047</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.3647</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1383_length_43653_cov_8.506196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1383_length_43653_cov_8.506196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.1383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541048" accession="ERS11143013">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143013</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541048</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.1563</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.0493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1563_length_39583_cov_11.016453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7619047619047619</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1563_length_39583_cov_11.016453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745963_bin.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFTH01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.1563</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541050" accession="ERS11143015">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143015</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541050</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.1772</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541050</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1020.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1772_length_36238_cov_619.285252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1772_length_36238_cov_619.285252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME026639</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__CAG-170 sp900548625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.1772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541051" accession="ERS11143016">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143016</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541051</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.2265</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2678713013780906</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_2265_length_29168_cov_7.020728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_2265_length_29168_cov_7.020728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738167_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__CAG-74;g__SFHK01;s__SFHK01 sp004556395</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738602_provirus.644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541052" accession="ERS11143017">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143017</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541052</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.2902</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.02938</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_2902_length_23192_cov_4.036816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_2902_length_23192_cov_4.036816</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745908_bin.146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Burkholderiales;f__Burkholderiaceae;g__Duodenibacillus;s__Duodenibacillus massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745870_virus.1535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541053" accession="ERS11143018">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143018</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541053</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.423</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541053</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>190.357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_423_length_93187_cov_110.823220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9333333333333332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_423_length_93187_cov_110.823220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__ML615J-28;f__CAG-698;g__CAG-698;s__CAG-698 sp000431235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541054" accession="ERS11143019">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143019</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541054</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.5264</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salmonella phage assan virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18068787764054067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541054</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_5264_length_13215_cov_9.395951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5714285714285714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_5264_length_13215_cov_9.395951</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__TF01-11;s__TF01-11 sp003529475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.5264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus; Salmonella phage assan</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541056" accession="ERS11143021">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143021</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541056</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.92</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541056</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>181.189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_92_length_192685_cov_106.686493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6785714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_92_length_192685_cov_106.686493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME025676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-177;s__CAG-177 sp003514385</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738532_virus.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541057" accession="ERS11143022">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143022</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541057</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_provirus.210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.1083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_210_length_120312_cov_19.034740_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746757_bin.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8076923076923077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_210_length_120312_cov_19.034740_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738598_bin.181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-492;s__CAG-492 sp900557045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746757_provirus.210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541058" accession="ERS11143023">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143023</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541058</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_provirus.552</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541058</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>128.242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_552_length_68377_cov_89.201523_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_552_length_68377_cov_89.201523_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME033251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella hominis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1726542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541059" accession="ERS11143024">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143024</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541059</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_virus.1132</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541059</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.2759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_1132_length_43374_cov_23.459339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_1132_length_43374_cov_23.459339</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738243_virus.814</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541060" accession="ERS11143025">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143025</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541060</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_virus.1436</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.6772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_1436_length_35663_cov_16.468246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_1436_length_35663_cov_16.468246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.2855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541061" accession="ERS11143026">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143026</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541061</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_virus.1830</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.23602</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_1830_length_29364_cov_4.092840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_1830_length_29364_cov_4.092840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0272453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541062" accession="ERS11143027">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143027</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541062</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_virus.2398</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15446967015660706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>50.1175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_2398_length_23650_cov_30.106775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_2398_length_23650_cov_30.106775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0203435</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541064" accession="ERS11143029">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143029</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541064</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_virus.690</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr131_1 virus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.04741189427312775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.05313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_690_length_60233_cov_5.390834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_690_length_60233_cov_5.390834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME014888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900543975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745588_virus.330</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade; crAssphage cr131_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541065" accession="ERS11143030">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143030</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541065</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_virus.97</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541065</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.2765</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_97_length_183948_cov_22.538160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.987012987012987</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_97_length_183948_cov_22.538160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738536_bin.336</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Terrisporobacter;s__Terrisporobacter sp900557165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746757_virus.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541066" accession="ERS11143031">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143031</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541066</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746767_provirus.36</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Tetrasphaera virus TJE1 provirus assembled from ERR7746767.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>36.7025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746767) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_36_length_203734_cov_22.490928_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746767_bin.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_36_length_203734_cov_22.490928_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_provirus.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Tijeunavirus; Tetrasphaera virus TJE1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541067" accession="ERS11143032">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143032</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541067</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746767_provirus.850</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746767.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8819113353173286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.1534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746767) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_850_length_52203_cov_9.744600_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746767_bin.10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_850_length_52203_cov_9.744600_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746094_virus.2712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541068" accession="ERS11143033">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143033</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541068</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746767_virus.1460</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746767.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12747797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541068</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.586</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746767) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_1460_length_38221_cov_8.614042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6428571428571429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_1460_length_38221_cov_8.614042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_provirus.883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541070" accession="ERS11143035">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143035</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541070</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746767_virus.2370</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746767.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.9722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746767) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>306</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_2370_length_28061_cov_4.390294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>78.02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_2370_length_28061_cov_4.390294</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541071" accession="ERS11143036">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143036</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541071</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746767_virus.755</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746767.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541071</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.4017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746767) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_755_length_55524_cov_15.334734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9767441860465116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_755_length_55524_cov_15.334734</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.755</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541072" accession="ERS11143037">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143037</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541072</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746768_provirus.133</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746768.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>409.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746768) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_5_1268__NODE_133_length_88816_cov_231.634422_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_5_1268__NODE_133_length_88816_cov_231.634422_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME212098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus lutetiensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746768_provirus.133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541073" accession="ERS11143038">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143038</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541073</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746768_virus.293</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bifidobacterium phage Bbif-1 virus assembled from ERR7746768.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541073</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>488.048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746768) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561218) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_5_1268__NODE_293_length_41986_cov_281.858622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_5_1268__NODE_293_length_41986_cov_281.858622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Actinomycetia;o__Actinomycetales;f__Bifidobacteriaceae;g__Bifidobacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745621_virus.292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Bifidobacterium phage Bbif-1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541074" accession="ERS11143039">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143039</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541074</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_provirus.1396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_1396_length_35033_cov_5.635771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4583333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_1396_length_35033_cov_5.635771_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0362748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541075" accession="ERS11143040">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143040</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541075</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_provirus.513</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.2515</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_513_length_68268_cov_5.748706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746773_bin.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6071428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_513_length_68268_cov_5.748706_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_provirus.513</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541076" accession="ERS11143041">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143041</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541076</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_virus.1151</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.9409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_1151_length_40554_cov_13.960941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_1151_length_40554_cov_13.960941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.708</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541077" accession="ERS11143042">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143042</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541077</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_virus.1375</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>75.8453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_1375_length_35490_cov_42.305735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_1375_length_35490_cov_42.305735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746285_provirus.191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541079" accession="ERS11143044">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143044</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541079</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_virus.3360</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.1614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_3360_length_17034_cov_18.506752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_3360_length_17034_cov_18.506752</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746144_virus.688</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541080" accession="ERS11143045">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143045</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541080</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_virus.574</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6954286436599909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541080</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.5192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_574_length_63785_cov_12.839251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_574_length_63785_cov_12.839251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738273_virus.972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541081" accession="ERS11143046">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143046</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541081</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_virus.80</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541081</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_80_length_173936_cov_19.957466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6101694915254238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_80_length_173936_cov_19.957466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746411_virus.256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541082" accession="ERS11143047">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143047</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541082</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746779_provirus.1465</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7746779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541082</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_1465_length_39255_cov_6.913089_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_1465_length_39255_cov_6.913089_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0345548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541083" accession="ERS11143048">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143048</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541083</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746779_provirus.338</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7746779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.7506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_338_length_99082_cov_8.478047_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746779_bin.50</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7692307692307693</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_338_length_99082_cov_8.478047_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME107760</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000434975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.1837</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541085" accession="ERS11143050">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143050</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541085</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746779_virus.1330</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541085</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.6768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_1330_length_42056_cov_21.235308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5789473684210527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_1330_length_42056_cov_21.235308</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745383_virus.314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541086" accession="ERS11143051">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143051</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541086</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746779_virus.2536</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11227973568281942</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.1378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_2536_length_23988_cov_3.866087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_2536_length_23988_cov_3.866087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_virus.1319</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541087" accession="ERS11143052">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143052</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541087</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746779_virus.480</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541087</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.6828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_480_length_82966_cov_7.354522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746779_bin.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>79.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_480_length_82966_cov_7.354522</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738566_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745831_provirus.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541088" accession="ERS11143053">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143053</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541088</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_provirus.1170</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.20600085628426487</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>57.1279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_1170_length_45040_cov_34.834842_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746785_bin.47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9523809523809524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_1170_length_45040_cov_34.834842_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME252988</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E sp900314705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746785_provirus.1170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541089" accession="ERS11143054">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143054</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541089</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_provirus.357</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus provirus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541089</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>72.632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_357_length_90052_cov_41.632276_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746785_bin.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.717948717948718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_357_length_90052_cov_41.632276_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738176_provirus.230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541090" accession="ERS11143055">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143055</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541090</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_provirus.948</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.8705</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_948_length_51298_cov_31.329767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746785_bin.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9473684210526316</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_948_length_51298_cov_31.329767_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME270871</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-628;s__CAG-628 sp000438415</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746660_provirus.394</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541091" accession="ERS11143056">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143056</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541091</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_virus.132</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.1266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>370</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_132_length_142112_cov_17.283261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6136363636363636</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_132_length_142112_cov_17.283261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__Faecalibacterium prausnitzii_E</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746785_virus.132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541093" accession="ERS11143058">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143058</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541093</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_virus.2070</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.08942731277533046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.9883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_2070_length_31484_cov_6.409940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746785_bin.286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>85.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_2070_length_31484_cov_6.409940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746785_virus.2070</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541094" accession="ERS11143059">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143059</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541094</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_virus.3102</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541094</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.5496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_3102_length_23409_cov_14.291788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7777777777777778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_3102_length_23409_cov_14.291788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746111_bin.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002251295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737954_provirus.190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541095" accession="ERS11143060">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143060</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541095</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_virus.4089</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03977380510648237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.77109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_4089_length_18725_cov_5.214768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_4089_length_18725_cov_5.214768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__UBA636;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.3018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541096" accession="ERS11143061">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143061</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541096</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_virus.788</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.4279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>772</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_788_length_56865_cov_6.109618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746785_bin.285</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.64</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4516129032258064</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_788_length_56865_cov_6.109618</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746785_virus.788</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541097" accession="ERS11143062">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143062</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541097</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746793_provirus.730</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746793.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7371</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746793) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_730_length_47772_cov_8.448013_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746793_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_730_length_47772_cov_8.448013_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__CAG-488;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746739_provirus.59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541099" accession="ERS11143064">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143064</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541099</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746793_virus.361</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746793.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541099</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>114.123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746793) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_361_length_76740_cov_69.469614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_361_length_76740_cov_69.469614</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900290275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.361</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541100" accession="ERS11143065">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143065</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541100</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746793_virus.768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746793.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.8399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746793) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_768_length_46379_cov_11.585072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_768_length_46379_cov_11.585072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745994_bin.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541101" accession="ERS11143066">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143066</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541101</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746794_provirus.152</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746794.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541101</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.3409</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746794) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561252) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_152_length_88220_cov_28.290528_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746794_bin.48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_152_length_88220_cov_28.290528_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746794_provirus.152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541102" accession="ERS11143067">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143067</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541102</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746794_provirus.37</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746794.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>273.356</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746794) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561252) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_37_length_190686_cov_152.821567_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_37_length_190686_cov_152.821567_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Enterococcaceae;g__Enterococcus_B;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746794_provirus.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541103" accession="ERS11143068">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143068</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541103</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746794_virus.495</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746794.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>59.3649</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746794) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561252) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_495_length_40468_cov_34.565076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9803921568627452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_495_length_40468_cov_34.565076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745561_bin.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746794_virus.495</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541104" accession="ERS11143069">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143069</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541104</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746794_virus.789</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746794.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1611458605941019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541104</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.2967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746794) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561252) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_789_length_28349_cov_8.650538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_L_13_1278__NODE_789_length_28349_cov_8.650538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME212098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus lutetiensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746794_virus.789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541105" accession="ERS11143070">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143070</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541105</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747083_provirus.177</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747083.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747083) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_177_length_138215_cov_19.959504_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747083_bin.140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_177_length_138215_cov_19.959504_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745556_provirus.188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541106" accession="ERS11143071">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143071</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541106</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747083_provirus.812</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747083.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>37.4467</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747083) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_812_length_59303_cov_22.599821_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7096774193548387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_812_length_59303_cov_22.599821_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738600_virus.1151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541108" accession="ERS11143073">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143073</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541108</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747083_virus.1831</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747083.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541108</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.3894</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747083) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_1831_length_33290_cov_32.906362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_1831_length_33290_cov_32.906362</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738557_bin.224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__Ruminococcus sp900540005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738278_virus.840</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541109" accession="ERS11143074">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143074</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541109</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747083_virus.2757</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747083.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.4683</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747083) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_2757_length_24007_cov_20.980025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>67.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_2757_length_24007_cov_20.980025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Coriobacteriaceae;g__Collinsella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738544_provirus.1098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541110" accession="ERS11143075">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143075</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541110</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747083_virus.411</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7747083.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747083) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_411_length_91184_cov_10.693898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7435897435897436</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_411_length_91184_cov_10.693898</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747083_virus.411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541111" accession="ERS11143076">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143076</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541111</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747083_virus.791</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7747083.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.6729066172282728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541111</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>256.783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747083) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_791_length_60268_cov_150.714143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5555555555555556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_791_length_60268_cov_150.714143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7737971_virus.634</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541112" accession="ERS11143077">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143077</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541112</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_provirus.1546</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14850677475569002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.2872</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1546_length_44079_cov_20.186537_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1546_length_44079_cov_20.186537_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745989_virus.1090</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541114" accession="ERS11143079">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143079</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541114</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.1068</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1161.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1068_length_56186_cov_644.774439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1068_length_56186_cov_644.774439</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746101_virus.616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541115" accession="ERS11143080">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143080</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541115</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.1355</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541115</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.3928</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1355_length_48380_cov_7.803283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1355_length_48380_cov_7.803283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738936_virus.386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541116" accession="ERS11143081">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143081</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541116</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.1542</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1542_length_44143_cov_5.833091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1542_length_44143_cov_5.833091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__MARSEILLE-P3954;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.1542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541117" accession="ERS11143082">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143082</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541117</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.1646</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07747797356828194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>33.4222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1646_length_42180_cov_18.865378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5217391304347826</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1646_length_42180_cov_18.865378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738588_bin.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Archaea;p__Thermoplasmatota;c__Thermoplasmata;o__Methanomassiliicoccales;f__Methanomethylophilaceae;g__UBA71;s__UBA71 sp006954465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.1646</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541118" accession="ERS11143083">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143083</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541118</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.1811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.5599</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1811_length_39650_cov_29.416067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6451612903225806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_1811_length_39650_cov_29.416067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME257298</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA738;s__UBA738 sp003522945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.1811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541119" accession="ERS11143084">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143084</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541119</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.2072</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>305.289</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1691</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_2072_length_35848_cov_172.035168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_2072_length_35848_cov_172.035168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.2072</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541120" accession="ERS11143085">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143085</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541120</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.2455</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Eganvirus virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_2455_length_31495_cov_6.054968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9047619047619048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_2455_length_31495_cov_6.054968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.2455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Peduovirinae; Eganvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541122" accession="ERS11143087">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143087</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541122</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03828965839962564</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>85.996</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_351_length_104455_cov_47.327627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9727272727272728</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_351_length_104455_cov_47.327627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738603_virus.384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541123" accession="ERS11143088">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143088</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541123</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.4280</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541123</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1979</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_4280_length_20149_cov_5.607613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_4280_length_20149_cov_5.607613</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-269;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.4280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541124" accession="ERS11143089">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143089</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541124</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.546</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.0544</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_546_length_82639_cov_31.663925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5306122448979592</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_546_length_82639_cov_31.663925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159485</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541125" accession="ERS11143090">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143090</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541125</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.80</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>55.4086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_80_length_209283_cov_31.299819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4065934065934066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_80_length_209283_cov_31.299819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.80</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541126" accession="ERS11143091">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143091</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541126</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_provirus.246</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.6022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_246_length_97023_cov_19.850494_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_246_length_97023_cov_19.850494_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745687_bin.245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__UBA6382;s__UBA6382 sp900557555</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_provirus.246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541128" accession="ERS11143093">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143093</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541128</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_virus.1118</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.87516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_1118_length_41786_cov_5.249514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.66</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8461538461538461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_1118_length_41786_cov_5.249514</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738599_bin.122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_virus.1118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541129" accession="ERS11143094">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143094</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541129</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_virus.1386</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_1386_length_36395_cov_11.665483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_1386_length_36395_cov_11.665483</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_virus.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541130" accession="ERS11143095">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143095</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541130</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_virus.1939</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1045154185022027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541130</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.4878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_1939_length_28479_cov_27.527357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747125_bin.165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_1939_length_28479_cov_27.527357</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738259_provirus.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541131" accession="ERS11143096">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143096</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541131</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_virus.2873</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09741189427312776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0831</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_2873_length_20916_cov_5.977014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8666666666666667</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_2873_length_20916_cov_5.977014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__UBA1777;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_virus.2873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541132" accession="ERS11143097">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143097</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541132</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_virus.576</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>62.982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_576_length_63011_cov_33.728700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.43243243243243246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_576_length_63011_cov_33.728700</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745845_bin.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541995</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738197_provirus.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541133" accession="ERS11143098">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143098</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541133</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_virus.788</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.1567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_788_length_51946_cov_8.238967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47368421052631576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_788_length_51946_cov_8.238967</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738557_virus.1534</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541134" accession="ERS11143099">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143099</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541134</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_provirus.1000</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541134</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.4975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1000_length_50962_cov_5.704510_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>88.32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1000_length_50962_cov_5.704510_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738531_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_B;c__Peptococcia;o__Peptococcales;f__Peptococcaceae;g__UMGS1590;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_provirus.931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541135" accession="ERS11143100">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143100</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541135</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_provirus.1619</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.65158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1619_length_36155_cov_5.858168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747128_bin.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>59.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5882352941176471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1619_length_36155_cov_5.858168_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-115;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_provirus.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541137" accession="ERS11143102">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143102</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541137</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_provirus.972</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541137</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.1126</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>461</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_972_length_52224_cov_9.773851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747128_bin.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_972_length_52224_cov_9.773851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME253778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Eubacterium_F;s__Eubacterium_F sp003491505</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738269_provirus.475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541138" accession="ERS11143103">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143103</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541138</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_virus.11921</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.5344</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_11921_length_6576_cov_8.359132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_11921_length_6576_cov_8.359132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747128_virus.11921</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541139" accession="ERS11143104">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143104</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541139</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_virus.1396</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03986784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5549</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1396_length_40116_cov_7.025600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1396_length_40116_cov_7.025600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738587_virus.1800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541140" accession="ERS11143105">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143105</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541140</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_virus.1949</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01497797356828193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541140</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>110.015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1949_length_31406_cov_72.868046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747128_bin.2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_1949_length_31406_cov_72.868046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.455</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541141" accession="ERS11143106">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143106</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541141</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_virus.2948</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.1041</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_2948_length_22111_cov_7.504902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_2948_length_22111_cov_7.504902</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7803603_bin.84</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminococcus;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747128_virus.2948</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540810" accession="ERS11142776">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142776</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540810</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746219_virus.1128</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746219.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540810</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.8006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746219) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559398) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_1128_length_62840_cov_33.320029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8775510204081632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586341</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_10_2632__NODE_1128_length_62840_cov_33.320029</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746061_bin.374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746219_virus.1128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540982" accession="ERS11142947">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142947</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540982</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.2037</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>172.982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>40</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2037_length_53830_cov_101.106822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_2037_length_53830_cov_101.106822</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME181333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900540885</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745759_provirus.941</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540991" accession="ERS11142956">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142956</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540991</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746688_virus.4263</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Streptococcus phage Javan372 virus assembled from ERR7746688.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.0717</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746688) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560553) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_4263_length_32974_cov_12.003952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5208333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_24_1920__NODE_4263_length_32974_cov_12.003952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME001139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Lactobacillales;f__Streptococcaceae;g__Streptococcus;s__Streptococcus parasanguinis_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746688_virus.4263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Streptococcus phage Javan372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13540997" accession="ERS11142962">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142962</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13540997</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746692_provirus.200</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746692.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13540997</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>26.6713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746692) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561431) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_200_length_83059_cov_15.357138_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746692_bin.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585774</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_C-REDO_B_7_1518__NODE_200_length_83059_cov_15.357138_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738555_provirus.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541005" accession="ERS11142970">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142970</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541005</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746721_virus.522</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746721.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541005</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:30Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4502</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746721) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561877) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_522_length_38591_cov_9.269876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS9106181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_7_1572__NODE_522_length_38591_cov_9.269876</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738628_virus.2595</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541011" accession="ERS11142976">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142976</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541011</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746739_provirus.311</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7746739.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12724256706111475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541011</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.2079</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746739) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561457) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_1_1512__NODE_311_length_68634_cov_8.958443_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746739_bin.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>81.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585768</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_P_1_1512__NODE_311_length_68634_cov_8.958443_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Clostridiales;f__Clostridiaceae;g__Clostridium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745906_virus.1378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541020" accession="ERS11142985">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142985</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541020</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.1429</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541020</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.2014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1429_length_37733_cov_10.451296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746741_bin.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_1429_length_37733_cov_10.451296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.1429</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541026" accession="ERS11142991">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142991</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541026</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746741_virus.606</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746741.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541026</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.1626</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746741) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561599) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_606_length_61752_cov_11.835638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8611111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_M_8_1180__NODE_606_length_61752_cov_11.835638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.606</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541034" accession="ERS11142999">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11142999</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541034</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746742_virus.611</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746742.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.5553</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746742) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561719) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_611_length_50596_cov_6.861854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-10-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>80.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8918918918918919</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585670</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_J_13_1252__NODE_611_length_50596_cov_6.861854</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746742_virus.611</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541040" accession="ERS11143005">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143005</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541040</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746748_virus.16057</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>926697</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Microviridae</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Microviridae sp. virus assembled from ERR7746748.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02900919059739989</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541040</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>6.89992</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746748) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561606) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_16057_length_5466_cov_3.932269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585580</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_12_1156__NODE_16057_length_5466_cov_3.932269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.16057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; unclassified Microviridae; Microviridae sp.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541049" accession="ERS11143014">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143014</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541049</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.1672</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.0102</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1672_length_37644_cov_5.833524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5909090909090909</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_1672_length_37644_cov_5.833524</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745947_bin.215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Evtepia;s__Evtepia sp004556345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.1672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541055" accession="ERS11143020">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143020</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541055</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746751_virus.675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746751.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25627392670077853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541055</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>159.105</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746751) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561748) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_675_length_72362_cov_94.604372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_23_1060__NODE_675_length_72362_cov_94.604372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746751_virus.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541063" accession="ERS11143028">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143028</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541063</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746757_virus.3538</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746757.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.56616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746757) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561545) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_3538_length_16988_cov_4.557980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.71</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585584</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_E_14_1160__NODE_3538_length_16988_cov_4.557980</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_provirus.3128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541069" accession="ERS11143034">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143034</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541069</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746767_virus.2019</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Butyrivibrio virus Arawn virus assembled from ERR7746767.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05119765791341377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.76377</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746767) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560255) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>510</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_2019_length_31120_cov_3.806333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5185185185185185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_H_10_2273__NODE_2019_length_31120_cov_3.806333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745924_provirus.769</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Arawnvirus; Butyrivibrio virus Arawn</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541078" accession="ERS11143043">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143043</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541078</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746773_virus.229</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured UAG-readthrough crAss clade virus assembled from ERR7746773.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.025</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541078</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>226.017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746773) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560047) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_229_length_108652_cov_126.728731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-07-25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586124</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_N_21_2138__NODE_229_length_108652_cov_126.728731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746773_virus.229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; UAG-readthrough crAss clade</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541084" accession="ERS11143049">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143049</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541084</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746779_provirus.907</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7746779.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11693695284855372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541084</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:29Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.3843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746779) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559840) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_907_length_55316_cov_22.351219_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7746779_bin.106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_K_1_2479__NODE_907_length_55316_cov_22.351219_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_virus.1910</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541092" accession="ERS11143057">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143057</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541092</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746785_virus.1723</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7746785.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541092</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.87953</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746785) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561612) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1651</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_1723_length_35537_cov_5.604766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585778</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_N_3_1523__NODE_1723_length_35537_cov_5.604766</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0362748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541098" accession="ERS11143063">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143063</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541098</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7746793_virus.1166</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7746793.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.25702505857542585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0672</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7746793) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561067) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>42</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_1166_length_35452_cov_6.957965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_17_1579__NODE_1166_length_35452_cov_6.957965</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME269197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__KLE1615;s__KLE1615 sp900066985</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.2363</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541107" accession="ERS11143072">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143072</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541107</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747083_virus.1199</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747083.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T01:22:28Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.9905</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747083) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4566884) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>48</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_1199_length_46075_cov_10.831232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_F_17_1567__NODE_1199_length_46075_cov_10.831232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.1185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541113" accession="ERS11143078">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143078</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541113</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_provirus.364</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541113</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.56116</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>45</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_364_length_102141_cov_5.296726_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747084_bin.185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.04</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_364_length_102141_cov_5.296726_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738251_virus.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541121" accession="ERS11143086">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143086</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541121</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747084_virus.2946</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747084.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541121</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.7386</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747084) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561533) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_2946_length_27344_cov_29.938204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>50.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8181818181818182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_K_14_1158__NODE_2946_length_27344_cov_29.938204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME204972</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900539625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.1159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541127" accession="ERS11143092">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143092</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541127</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747125_provirus.60</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747125.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541127</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>73.0913</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747125) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560347) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_60_length_168389_cov_40.205571_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-05-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586002</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_K_21_1780__NODE_60_length_168389_cov_40.205571_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747125_provirus.60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541136" accession="ERS11143101">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143101</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541136</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_provirus.51</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>504.952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>19</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_51_length_205871_cov_314.071236_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747128_bin.20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_51_length_205871_cov_314.071236_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745625_provirus.431</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541142" accession="ERS11143107">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143107</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541142</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_virus.4303</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541142</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.4561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_4303_length_15742_cov_8.638940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.54</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_4303_length_15742_cov_8.638940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.8114</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541143" accession="ERS11143108">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143108</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541143</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747128_virus.751</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747128.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>196.662</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747128) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559679) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_751_length_61576_cov_119.023968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_L_2_2317__NODE_751_length_61576_cov_119.023968</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746436_virus.373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541144" accession="ERS11143109">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143109</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541144</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_provirus.1094</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.2031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1094_length_57285_cov_14.870770_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1094_length_57285_cov_14.870770_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.1350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541145" accession="ERS11143110">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143110</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541145</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_provirus.168</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_168_length_141384_cov_20.419080_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_168_length_141384_cov_20.419080_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_provirus.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541146" accession="ERS11143111">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143111</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541146</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_provirus.408</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>198.761</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_408_length_95527_cov_122.685186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>44.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6923076923076923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_408_length_95527_cov_122.685186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738223_provirus.260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541147" accession="ERS11143112">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143112</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541147</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_provirus.819</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7524779735682819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541147</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>43.6811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_819_length_67122_cov_26.562070_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7941176470588235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_819_length_67122_cov_26.562070_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME140265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia;s__Blautia stercoris</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_provirus.819</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541148" accession="ERS11143113">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143113</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541148</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.1093</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.45473568281938326</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541148</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>46.8722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1093_length_57292_cov_27.921314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1093_length_57292_cov_27.921314</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738191_bin.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp900545245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.1525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541149" accession="ERS11143114">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143114</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541149</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.1278</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10029736837256067</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>710.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1278_length_52324_cov_423.865715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1278_length_52324_cov_423.865715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746411_bin.453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.1278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541150" accession="ERS11143115">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143115</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541150</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.1723</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541150</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.5305</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1723_length_43559_cov_7.065958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5483870967741935</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_1723_length_43559_cov_7.065958</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME251083</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;s__Gemmiger qucibialis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.1723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541151" accession="ERS11143116">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143116</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541151</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.195</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2061</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_195_length_133840_cov_7.993982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6458333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_195_length_133840_cov_7.993982</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541152" accession="ERS11143117">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143117</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541152</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.2223</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541152</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.0798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_2223_length_36647_cov_17.472136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747140_bin.350</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_2223_length_36647_cov_17.472136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME124777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;s__Dysosmobacter sp001916835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746217_virus.1986</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541153" accession="ERS11143118">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143118</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541153</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.2372</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Vegasvirus virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15395275377695644</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>25.4577</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_2372_length_35211_cov_15.860135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6129032258064516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_2372_length_35211_cov_15.860135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Dorea_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.2372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Gochnauervirinae; Vegasvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541154" accession="ERS11143119">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143119</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541154</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.2776</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0505538234328848</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.2088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_2776_length_31776_cov_11.788448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_2776_length_31776_cov_11.788448</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0353882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541155" accession="ERS11143120">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143120</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541155</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.324</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>163.627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_324_length_105603_cov_100.295093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.2857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_324_length_105603_cov_100.295093</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541156" accession="ERS11143121">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143121</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541156</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.4156</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.09223568281938328</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>225.359</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_4156_length_24020_cov_241.475045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_4156_length_24020_cov_241.475045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;unk;unk;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.4156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541157" accession="ERS11143122">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143122</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541157</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.517</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.6456</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_517_length_86103_cov_6.784530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_517_length_86103_cov_6.784530</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541158" accession="ERS11143123">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143123</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541158</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.73</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>79.8088</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_73_length_195331_cov_47.029561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7538461538461538</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_73_length_195331_cov_47.029561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541159" accession="ERS11143124">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143124</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541159</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747140_virus.984</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747140.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.0777</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747140) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560474) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_984_length_60560_cov_13.123870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5357142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_C_4_1831__NODE_984_length_60560_cov_13.123870</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Lachnospira;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747140_virus.984</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541160" accession="ERS11143125">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143125</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541160</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_provirus.18</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.5381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>60</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_18_length_366877_cov_11.615319_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747255_bin.223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>96.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8846153846153846</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_18_length_366877_cov_11.615319_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738608_bin.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_provirus.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541161" accession="ERS11143126">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143126</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541161</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_provirus.243</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses provirus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.325</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541161</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.0748</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_243_length_130216_cov_13.743920_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_243_length_130216_cov_13.743920_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738258_provirus.141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541162" accession="ERS11143127">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143127</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541162</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_provirus.600</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae provirus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.019</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_600_length_82223_cov_24.572542_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_600_length_82223_cov_24.572542_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_provirus.600</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541163" accession="ERS11143128">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143128</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541163</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.12324</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.4118</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_12324_length_11095_cov_10.781060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_12324_length_11095_cov_10.781060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745582_bin.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.12324</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541164" accession="ERS11143129">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143129</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541164</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.1740</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Autographiviridae virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.8698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_1740_length_44625_cov_13.367895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_1740_length_44625_cov_13.367895</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746398_virus.1478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Autographiviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541165" accession="ERS11143130">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143130</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541165</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.1956</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.2096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_1956_length_41224_cov_20.282038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>70.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_1956_length_41224_cov_20.282038</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.1133</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541166" accession="ERS11143131">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143131</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541166</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.224</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.5914</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_224_length_133540_cov_10.623416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_224_length_133540_cov_10.623416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738580_bin.236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__UMGS1668;s__UMGS1668 sp900553955</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746741_virus.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541167" accession="ERS11143132">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143132</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541167</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.2452</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7006</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_2452_length_35771_cov_8.595086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_2452_length_35771_cov_8.595086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__CAG-353;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738629_provirus.570</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541168" accession="ERS11143133">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143133</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541168</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.2923</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.5097</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_2923_length_32170_cov_6.052940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>58.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_2923_length_32170_cov_6.052940</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME098969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia sp900542795</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.2923</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541169" accession="ERS11143134">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143134</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541169</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.3542</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.93317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_3542_length_28385_cov_5.728416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>51.86</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_3542_length_28385_cov_5.728416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738230_bin.158</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Fibrobacterota;c__Fibrobacteria;o__Fibrobacterales;f__Fibrobacteraceae;g__Fibrobacter_A;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745419_virus.1042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541170" accession="ERS11143135">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143135</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541170</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.4176</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.19971</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_4176_length_25136_cov_3.939383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_4176_length_25136_cov_3.939383</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.4176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541171" accession="ERS11143136">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143136</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541171</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.4821</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.52685</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_4821_length_22756_cov_5.668504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>98.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_4821_length_22756_cov_5.668504</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747255_virus.4821</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541172" accession="ERS11143137">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143137</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541172</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747255_virus.833</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747255.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541172</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.7474</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747255) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567690) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_833_length_68788_cov_7.491086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.09</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5116279069767442</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_14_2320__NODE_833_length_68788_cov_7.491086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0380017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541173" accession="ERS11143138">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143138</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541173</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_provirus.1231</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10303179700116676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541173</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>74.3278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>516</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_1231_length_44379_cov_46.330685_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747322_bin.160</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.06</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_1231_length_44379_cov_46.330685_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738566_bin.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Spirochaetota;c__Spirochaetia;o__Treponematales;f__Treponemataceae;g__Treponema_D;s__Treponema_D sp900541945</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_provirus.1231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541174" accession="ERS11143139">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143139</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541174</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_provirus.266</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>184.893</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>47</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_266_length_97570_cov_117.825885_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5238095238095238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_266_length_97570_cov_117.825885_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_provirus.266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541175" accession="ERS11143140">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143140</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541175</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_provirus.436</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.7225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_436_length_76659_cov_12.224034_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7857142857142857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_436_length_76659_cov_12.224034_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME112856</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp003447235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738554_virus.68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541176" accession="ERS11143141">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143141</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541176</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_provirus.866</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Epona provirus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541176</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2638</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>82</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_866_length_53857_cov_6.632875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_866_length_53857_cov_6.632875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143157</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Clostridium_Q;s__Clostridium_Q sp003024715</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_provirus.866</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Eponavirus; Faecalibacterium virus Epona</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541177" accession="ERS11143142">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143142</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541177</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_virus.1321</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541177</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.65944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>21</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_1321_length_42668_cov_4.975699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47619047619047616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_1321_length_42668_cov_4.975699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_virus.1510</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541178" accession="ERS11143143">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143143</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541178</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_virus.1944</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus virus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541178</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>450.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3475</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_1944_length_33064_cov_263.731015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>61.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.782608695652174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_1944_length_33064_cov_263.731015</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME145417</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Enterobacteriaceae;g__Escherichia;s__Escherichia coli</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_virus.1944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541179" accession="ERS11143144">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143144</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541179</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_virus.2754</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.8828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_2754_length_26270_cov_7.079143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_2754_length_26270_cov_7.079143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747322_virus.2754</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541180" accession="ERS11143145">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143145</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541180</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_virus.594</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.7869</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_594_length_65268_cov_6.845500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_594_length_65268_cov_6.845500</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738588_virus.1048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541181" accession="ERS11143146">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143146</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541181</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747322_virus.830</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747322.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541181</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>32.797</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747322) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560430) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_830_length_55031_cov_20.336390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-02</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586023</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_20_1823__NODE_830_length_55031_cov_20.336390</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME128517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900290275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738493_virus.834</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541182" accession="ERS11143147">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143147</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541182</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_provirus.112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541182</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>99.0223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_112_length_174131_cov_65.079113_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747324_bin.83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8780487804878049</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_112_length_174131_cov_65.079113_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738571_virus.1369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541183" accession="ERS11143148">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143148</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541183</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_provirus.2891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.8617</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2891_length_30274_cov_8.979303_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.87</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8421052631578947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2891_length_30274_cov_8.979303_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME085241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;s__Holdemanella biformis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_provirus.2891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541184" accession="ERS11143149">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143149</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541184</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.10675</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.11996874984972046</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4659</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_10675_length_11896_cov_10.098232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; ITR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_10675_length_11896_cov_10.098232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738570_virus.11112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541185" accession="ERS11143150">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143150</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541185</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.1243</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.7712</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_1243_length_52097_cov_12.063149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6341463414634146</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_1243_length_52097_cov_12.063149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_virus.808</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541186" accession="ERS11143151">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143151</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541186</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.1464</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541186</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.5194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_1464_length_46733_cov_16.586034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_1464_length_46733_cov_16.586034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_C;c__Negativicutes;o__Acidaminococcales;f__Acidaminococcaceae;g__Phascolarctobacterium_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738581_virus.1943</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541187" accession="ERS11143152">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143152</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541187</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.1828</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541187</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>108.027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_1828_length_40353_cov_69.070191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.08</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_1828_length_40353_cov_69.070191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0359261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541188" accession="ERS11143153">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143153</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541188</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.2036</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.01299472295514512</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541188</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.8273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2036_length_37475_cov_8.703380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.63</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2036_length_37475_cov_8.703380</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.2036</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541189" accession="ERS11143154">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143154</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541189</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.2425</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541189</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>60.5918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2425_length_33712_cov_35.529776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>74.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2425_length_33712_cov_35.529776</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0256792</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541190" accession="ERS11143155">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143155</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541190</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.2757</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541190</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.0506</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>53</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2757_length_31118_cov_7.075062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_2757_length_31118_cov_7.075062</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME159035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp900317525</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0240719</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541191" accession="ERS11143156">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143156</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541191</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.3365</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>340016</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured virus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Roseburia phage Jekyll virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.99452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_3365_length_27154_cov_4.571297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>uncharacterized</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_3365_length_27154_cov_4.571297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738602_bin.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Acetatifactor;s__Acetatifactor sp900554205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738252_provirus.1014</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; unclassified bacterial viruses; Roseburia phage Jekyll</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541192" accession="ERS11143157">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143157</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541192</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.4063</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.14970084156959676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>68</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_4063_length_23780_cov_7.451842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_4063_length_23780_cov_7.451842</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.4063</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541193" accession="ERS11143158">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143158</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541193</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.584</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.8843</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_584_length_80651_cov_18.217787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>87.39</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_584_length_80651_cov_18.217787</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745594_virus.465</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541194" accession="ERS11143159">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143159</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541194</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747324_virus.9746</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7747324.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.02187416669562399</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>48.1867</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747324) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4560480) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_9746_length_12698_cov_27.857460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-06-01</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586022</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_A_18_1822__NODE_9746_length_12698_cov_27.857460</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747324_virus.9746</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541195" accession="ERS11143160">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143160</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541195</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_provirus.1397</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.22703565487607813</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.0253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1397_length_52550_cov_7.642464_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4090909090909091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1397_length_52550_cov_7.642464_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME111561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Agathobacter;s__Agathobacter faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_provirus.1397</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541196" accession="ERS11143161">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143161</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541196</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_provirus.1768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541196</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>122.669</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>120</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1768_length_45995_cov_75.705932_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.896551724137931</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1768_length_45995_cov_75.705932_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Holdemanella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738559_provirus.1426</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541197" accession="ERS11143162">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143162</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541197</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_provirus.233</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae provirus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:33Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>34.3934</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_233_length_124647_cov_19.942554_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_233_length_124647_cov_19.942554_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738586_virus.660</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541198" accession="ERS11143163">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143163</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541198</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_provirus.337</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541198</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>52.1149</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_337_length_107053_cov_30.608062_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747394_bin.425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>82.29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_337_length_107053_cov_30.608062_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Anaerovoracaceae;g__VUNA01;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_provirus.337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541199" accession="ERS11143164">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143164</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541199</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_provirus.471</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13934525602217446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.37412</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_471_length_91379_cov_5.170467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747394_bin.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>57.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9565217391304348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_471_length_91379_cov_5.170467_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738657_bin.225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Monoglobales_A;f__UBA1381;g__RQCD01;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738146_provirus.143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541200" accession="ERS11143165">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143165</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541200</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_provirus.797</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.675</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541200</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7641</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_797_length_71434_cov_6.139258_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_797_length_71434_cov_6.139258_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541201" accession="ERS11143166">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143166</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541201</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.1137</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541201</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.2077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>481</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1137_length_58899_cov_8.704192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>97.18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6451612903225806</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1137_length_58899_cov_8.704192</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745335_virus.1195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541202" accession="ERS11143167">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143167</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541202</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.1447</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541202</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>70.1165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1447_length_51799_cov_40.846526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.631578947368421</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_1447_length_51799_cov_40.846526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738167_virus.348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541203" accession="ERS11143168">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143168</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541203</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.2053</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.10203249223614484</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541203</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.5828</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_2053_length_42217_cov_11.298434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.91</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7368421052631579</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_2053_length_42217_cov_11.298434</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746757_bin.199</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Elusimicrobiota;c__Elusimicrobia;o__Elusimicrobiales;f__Elusimicrobiaceae;g__UBA1436;s__UBA1436 sp900541355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.1243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541204" accession="ERS11143169">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143169</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541204</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.2615</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.00379359296842034</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541204</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.53292</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>106</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_2615_length_36829_cov_5.316228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9285714285714286</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_2615_length_36829_cov_5.316228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000676</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;s__Blautia_A massiliensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.2615</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541205" accession="ERS11143170">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143170</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541205</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.2960</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0346637675235751</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.887</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_2960_length_34477_cov_12.253401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>93.67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_2960_length_34477_cov_12.253401</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738596_provirus.332</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541206" accession="ERS11143171">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143171</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541206</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.3759</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541206</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.8138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_3759_length_29764_cov_6.032001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_3759_length_29764_cov_6.032001</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Gemmiger;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.3759</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541207" accession="ERS11143172">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143172</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541207</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.4462</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.8878</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_4462_length_26661_cov_22.396329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>75.16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_4462_length_26661_cov_22.396329</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME018576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;s__RC9 sp000433355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746767_virus.2138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541208" accession="ERS11143173">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143173</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541208</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.5031</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_5031_length_24714_cov_4.504242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.33</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9166666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_5031_length_24714_cov_4.504242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.5031</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541209" accession="ERS11143174">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143174</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541209</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747394_virus.7503</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747394.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0024238520115018</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541209</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.9526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747394) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4567788) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>348</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_7503_length_18848_cov_11.782004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>77.03</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7894736842105263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586170</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_PheChl_Fiber-Hadza-Nepal_C_8_2273__NODE_7503_length_18848_cov_11.782004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME196077</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Acutalibacteraceae;g__Ruminococcus_E;s__Ruminococcus_E bromii_B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747394_virus.7503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541210" accession="ERS11143175">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143175</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541210</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_provirus.1165</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>29.2569</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_1165_length_72402_cov_34.379551_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8947368421052632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_1165_length_72402_cov_34.379551_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_provirus.1165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541211" accession="ERS11143176">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143176</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541211</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_provirus.183</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541211</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>35.6472</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_183_length_189874_cov_20.766082_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747426_bin.236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_183_length_189874_cov_20.766082_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738143_provirus.882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541212" accession="ERS11143177">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143177</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541212</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_provirus.28</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>40.526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_28_length_369314_cov_24.384404_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5818181818181818</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_28_length_369314_cov_24.384404_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738176_bin.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738461_virus.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541213" accession="ERS11143178">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143178</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541213</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_provirus.453</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541213</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>87.0369</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>293</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>132</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_453_length_127034_cov_54.007987_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747426_bin.212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.79</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_453_length_127034_cov_54.007987_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Ruminiclostridium_E;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738179_provirus.207</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541214" accession="ERS11143179">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143179</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541214</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_provirus.939</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541214</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.4627</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>22</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>72</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_939_length_83724_cov_12.728765_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>69.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_939_length_83724_cov_12.728765_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745903_provirus.800</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541215" accession="ERS11143180">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143180</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541215</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.1488</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Oengus virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>818.883</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_1488_length_62021_cov_484.274571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9482758620689656</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_1488_length_62021_cov_484.274571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738608_virus.1393</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Oengusvirus; Faecalibacterium virus Oengus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541216" accession="ERS11143181">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143181</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541216</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.1764</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07736784140969163</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541216</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1191</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_1764_length_54715_cov_5.919287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>65.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_1764_length_54715_cov_5.919287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738239_virus.269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541217" accession="ERS11143182">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143182</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541217</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.210</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541217</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>201.548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_210_length_181367_cov_130.218633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.640625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_210_length_181367_cov_130.218633</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.210</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541218" accession="ERS11143183">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143183</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541218</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.2503</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.925</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541218</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.288</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_2503_length_41686_cov_8.790045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_2503_length_41686_cov_8.790045</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Blautia_A;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.2503</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541219" accession="ERS11143184">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143184</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541219</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.2731</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541219</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.2622</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_2731_length_39316_cov_7.972706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747426_bin.428</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>90.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_2731_length_39316_cov_7.972706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Cyanobacteria;c__Vampirovibrionia;o__Gastranaerophilales;f__Gastranaerophilaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747426_virus.2731</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541220" accession="ERS11143185">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143185</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541220</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.2891</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541220</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>38.0535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>25</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_2891_length_37697_cov_23.335486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>92.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8275862068965517</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_2891_length_37697_cov_23.335486</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745835_bin.156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Butyricicoccaceae;g__Butyricicoccus_A;s__Butyricicoccus_A sp002395695</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.2122</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541221" accession="ERS11143186">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143186</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541221</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.3678</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.5125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541221</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.81548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_3678_length_30753_cov_5.544074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>91.12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7647058823529411</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_3678_length_30753_cov_5.544074</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Actinobacteriota;c__Coriobacteriia;o__Coriobacteriales;f__Eggerthellaceae;g__Ellagibacter;s__;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738218_virus.1048</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541222" accession="ERS11143187">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143187</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541222</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.4926</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541222</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>41.8144</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>67</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_4926_length_24014_cov_24.985629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_4926_length_24014_cov_24.985629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738179_virus.1723</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541223" accession="ERS11143188">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143188</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541223</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.6494</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Salasmaviridae virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541223</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.8424</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_6494_length_18948_cov_14.767156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.36</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_6494_length_18948_cov_14.767156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747084_virus.4536</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Salasmaviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541224" accession="ERS11143189">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143189</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541224</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747426_virus.9565</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747426.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541224</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.6437</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747426) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559945) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_9565_length_13577_cov_4.467556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>64.26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_I_19_2477__NODE_9565_length_13577_cov_4.467556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__Dysosmobacter;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738624_virus.9966</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541225" accession="ERS11143190">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143190</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541225</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_provirus.1547</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.3625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541225</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>21.9016</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_1547_length_51062_cov_13.563872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>94.62</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_1547_length_51062_cov_13.563872_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.1032</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541226" accession="ERS11143191">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143191</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541226</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_provirus.2135</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541226</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.93346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2135_length_41072_cov_5.393341_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>72.35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4444444444444444</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2135_length_41072_cov_5.393341_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738957_bin.179</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelotrichaceae;g__Bulleidia;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_provirus.2135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541227" accession="ERS11143192">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143192</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541227</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_provirus.506</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.2969</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_506_length_99069_cov_6.733736_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747522_bin.208</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9130434782608696</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_506_length_99069_cov_6.733736_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746345_bin.167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-460;s__CAG-460 sp000437315</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738187_provirus.645</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541228" accession="ERS11143193">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143193</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541228</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_provirus.979</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541228</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7017</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>83</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>26</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_979_length_68493_cov_8.298848_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747522_bin.240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7096774193548387</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_979_length_68493_cov_8.298848_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738553_virus.1446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541229" accession="ERS11143194">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143194</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541229</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.1402</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.827477973568282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541229</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.8028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>629</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_1402_length_54192_cov_24.951141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_1402_length_54192_cov_24.951141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738651_virus.1381</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541230" accession="ERS11143195">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143195</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541230</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.1749</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541230</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.8713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_1749_length_47237_cov_15.041243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8936170212765957</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_1749_length_47237_cov_15.041243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738606_bin.280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__UMGS1217;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_virus.1749</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541231" accession="ERS11143196">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143196</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541231</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.2000</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541231</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.1139</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>52</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2000_length_42890_cov_40.033962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2000_length_42890_cov_40.033962</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746793_virus.873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541232" accession="ERS11143197">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143197</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541232</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.2315</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>11.0379</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2315_length_38673_cov_6.762722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2315_length_38673_cov_6.762722</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME143156</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Faecalibacillus;s__Faecalibacillus faecis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738561_virus.1125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541233" accession="ERS11143198">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143198</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541233</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.2540</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541233</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.7425</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2540_length_36063_cov_6.414911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_2540_length_36063_cov_6.414911</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_provirus.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541234" accession="ERS11143199">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143199</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541234</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.3042</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.17658177410957304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.7346</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_3042_length_31461_cov_8.233463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>56.69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_3042_length_31461_cov_8.233463</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738631_virus.1175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541235" accession="ERS11143200">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143200</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541235</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.4045</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.29686123348017607</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>63.5789</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>227</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_4045_length_25130_cov_38.705903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>60.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_4045_length_25130_cov_38.705903</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738610_provirus.918</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541236" accession="ERS11143201">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143201</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541236</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.516</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAssphage cr8_1 virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12878892628021937</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541236</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>287.337</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_516_length_98777_cov_172.968531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_516_length_98777_cov_172.968531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738515_bin.43</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella copri</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746012_virus.107</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses; crAssphage cr8_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541237" accession="ERS11143202">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143202</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541237</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747522_virus.8526</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured unclassified Astrithrvirus virus assembled from ERR7747522.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541237</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.302</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747522) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4561110) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_8526_length_13475_cov_8.609494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-01-30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_D_19_1578__NODE_8526_length_13475_cov_8.609494</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME116287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia_A;o__Christensenellales;f__Borkfalkiaceae;g__UBA10281;s__UBA10281 sp900767815</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747522_virus.8526</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; Astrithrvirus; unclassified Astrithrvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541238" accession="ERS11143203">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143203</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541238</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_provirus.1252</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lagaffe provirus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541238</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.418</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>372</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>59</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_1252_length_51154_cov_10.070462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>99.1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9692307692307692</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_1252_length_51154_cov_10.070462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0372952</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Lagaffevirus; Faecalibacterium virus Lagaffe</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541239" accession="ERS11143204">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143204</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541239</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_provirus.1752</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541239</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>1887.37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_1752_length_39660_cov_1368.283986_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.74</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_1752_length_39660_cov_1368.283986_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME108259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp900313215</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738604_provirus.763</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541240" accession="ERS11143205">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143205</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541240</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_provirus.523</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Taranis provirus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541240</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>222.384</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>453</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_523_length_90767_cov_140.001875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7297297297297297</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_523_length_90767_cov_140.001875_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738264_virus.805</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Taranisvirus; Faecalibacterium virus Taranis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541241" accession="ERS11143206">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143206</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541241</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_virus.112</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Spbetavirus virus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541241</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>77.2983</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_112_length_191100_cov_48.351183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>35</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>20</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7534246575342466</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_112_length_191100_cov_48.351183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747615_virus.112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Spbetavirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541242" accession="ERS11143207">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143207</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541242</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_virus.16257</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1478972</TAXON_ID>
      <SCIENTIFIC_NAME>Gokushovirinae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Chlamydiamicrovirus virus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541242</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>39.5313</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_16257_length_5529_cov_26.178283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>ssDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_16257_length_5529_cov_26.178283</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747615_virus.16257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Monodnaviria; Sangervirae; Phixviricota; Malgrandaviricetes; Petitvirales; Microviridae; Gokushovirinae; Chlamydiamicrovirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541243" accession="ERS11143208">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143208</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541243</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_virus.2052</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Lugh virus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.825</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541243</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>15.0699</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_2052_length_35085_cov_8.990374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_2052_length_35085_cov_8.990374</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747615_virus.2052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae; Lughvirus; Faecalibacterium virus Lugh</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541244" accession="ERS11143209">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143209</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541244</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_virus.2632</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541244</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>7.02321</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>166</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_2632_length_28627_cov_4.180175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.44</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7333333333333333</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_2632_length_28627_cov_4.180175</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Faecalibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0347707</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541245" accession="ERS11143210">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143210</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541245</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_virus.471</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541245</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>13.4212</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>32</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_471_length_96134_cov_8.434711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.51</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9772727272727272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_471_length_96134_cov_8.434711</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Anaerobutyricum;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747615_virus.471</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541246" accession="ERS11143211">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143211</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541246</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747615_virus.743</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7747615.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.725</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541246</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>9.51713</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747615) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559937) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>195</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_743_length_73507_cov_6.020809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6792452830188679</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-I-L_M_19_2499__NODE_743_length_73507_cov_6.020809</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Intestinibacter;s__Intestinibacter bartlettii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745819_virus.561</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541247" accession="ERS11143212">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143212</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541247</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_provirus.1166</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.4375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>294.416</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>317</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_1166_length_67350_cov_187.902918_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5517241379310345</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_1166_length_67350_cov_187.902918_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738270_virus.391</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541248" accession="ERS11143213">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143213</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541248</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_provirus.1855</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.24217479228380095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541248</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>27.8632</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>531</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_1855_length_48494_cov_16.851808_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.57</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8888888888888888</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_1855_length_48494_cov_16.851808_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Coprococcus;s__Coprococcus eutactus_A</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_provirus.1855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541249" accession="ERS11143214">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143214</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541249</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_provirus.2857</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541249</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:34Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>228.881</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>30</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_2857_length_34638_cov_128.357918_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>68.85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_2857_length_34638_cov_128.357918_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME142596</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotellamassilia;s__Prevotellamassilia timonensis</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_provirus.2857</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541250" accession="ERS11143215">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143215</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541250</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_provirus.503</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.06486784140969165</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541250</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>10.1117</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_503_length_110883_cov_5.634189_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747782_bin.194</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>84.76</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4615384615384616</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_503_length_110883_cov_5.634189_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__Roseburia;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738275_provirus.24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541251" accession="ERS11143216">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143216</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541251</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_provirus.96</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.18368843659633977</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541251</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.4069</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>28</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_96_length_236029_cov_48.849851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747782_bin.151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>89.96</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_96_length_236029_cov_48.849851_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__TANB77;f__CAG-508;g__CAG-245;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738274_virus.1718</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541252" accession="ERS11143217">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143217</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541252</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_virus.162</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Podoviridae virus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.03878438135477076</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541252</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>19.4446</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_162_length_190958_cov_10.868400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.48148148148148145</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_162_length_190958_cov_10.868400</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7747782_virus.162</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541253" accession="ERS11143218">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143218</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541253</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_virus.209</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.2375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541253</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>86.6653</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_209_length_172865_cov_50.527027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_209_length_172865_cov_50.527027</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738957_virus.128</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541254" accession="ERS11143219">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143219</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541254</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_virus.2446</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales virus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.07619765791341378</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541254</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>761.603</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>4</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_2446_length_39534_cov_436.002129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747782_bin.274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8571428571428571</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_2446_length_39534_cov_436.002129</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745291_bin.408</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Paludibacteraceae;g__RF16;s__RF16 sp900556095</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738235_virus.1304</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541255" accession="ERS11143220">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143220</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541255</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_virus.2807</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1405180011700873</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541255</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>283.735</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>85</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_2807_length_35265_cov_166.036035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>52.41</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_2807_length_35265_cov_166.036035</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745423_virus.493</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541256" accession="ERS11143221">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143221</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541256</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_virus.3626</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0752082847671273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541256</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>89.8275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_3626_length_28496_cov_51.834582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7747782_bin.141</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>63.73</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8823529411764706</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_3626_length_28496_cov_51.834582</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738256_bin.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp000436915</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745994_virus.2075</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541257" accession="ERS11143222">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143222</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541257</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7747782_virus.576</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>1701671</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured podovirus</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured crAss-like viruses virus assembled from ERR7747782.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541257</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>14.5678</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7747782) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (ERZ4559595) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>69</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_576_length_104147_cov_8.097310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>metaSPAdes;3.13.1;-k 21,33,55,77 --merged BBMerge.fq.gz -1 R1.fq.gz -2 R2.fq.gz</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2014-08-12</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>18</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9078947368421052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2586180</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-E-H_J_6_2308__NODE_576_length_104147_cov_8.097310</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__UBA932;g__RC9;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738601_virus.368</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Podoviridae; crAss-like viruses</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541258" accession="ERS11143223">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143223</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541258</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_provirus.1079219</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Faecalibacterium virus Mushu provirus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541258</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>8.22694</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>24</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>698</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1079219__flag_1__multi_6.0000__len_28462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>71.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8695652173913043</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1079219__flag_1__multi_6.0000__len_28462_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0272278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Mushuvirus; Faecalibacterium virus Mushu</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541259" accession="ERS11143224">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143224</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541259</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_provirus.1818991</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541259</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>68.6673</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>171</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1818991__flag_0__multi_50.9916__len_119818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6363636363636364</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1818991__flag_0__multi_50.9916__len_119818_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;unk;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_provirus.1818991</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541260" accession="ERS11143225">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143225</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541260</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_provirus.24103</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541260</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>182.827</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>15</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>88</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_24103__flag_0__multi_137.3275__len_25186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>55.38</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8333333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_24103__flag_0__multi_137.3275__len_25186_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__Erysipelotrichales;f__Erysipelatoclostridiaceae;g__Catenibacterium;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738247_virus.1820</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541261" accession="ERS11143226">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143226</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541261</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_provirus.2832454</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Caudovirales provirus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>84.3587</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>110</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2832454__flag_0__multi_77.8954__len_59234_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>95.92</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2832454__flag_0__multi_77.8954__len_59234_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0345548</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541262" accession="ERS11143227">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143227</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541262</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_provirus.3136066</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541262</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>23.6496</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3136066__flag_0__multi_18.0000__len_235671_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7803603_bin.389</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.11</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4090909090909091</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3136066__flag_0__multi_18.0000__len_235671_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME039567</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-632;s__CAG-632 sp900539185</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_provirus.3136066</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541263" accession="ERS11143228">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143228</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541263</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_provirus.3347351</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2100421</TAXON_ID>
      <SCIENTIFIC_NAME>uncultured Caudovirales phage</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Bacteroides phage F2 provirus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541263</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>174.589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3347351__flag_0__multi_120.9793__len_58066_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7803603_bin.457</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>73.81</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3347351__flag_0__multi_120.9793__len_58066_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME271908</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Bacteroidota;c__Bacteroidia;o__Bacteroidales;f__Bacteroidaceae;g__Prevotella;s__Prevotella sp002265625</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_provirus.3347351</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Bacteroides phage F2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541264" accession="ERS11143229">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143229</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541264</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_provirus.482770</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae provirus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541264</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>49.9589</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>296</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>159</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_482770__flag_0__multi_38.9846__len_103861_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7803603_bin.340</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>provirus (UpViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_482770__flag_0__multi_38.9846__len_103861_1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0226686</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541265" accession="ERS11143230">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143230</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541265</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.103624</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541265</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>265.234</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>34</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_103624__flag_3__multi_198.0000__len_38807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.47368421052631576</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_103624__flag_3__multi_198.0000__len_38807</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738227_bin.197</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__Angelakisella;s__Angelakisella sp004557855</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.103624</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541266" accession="ERS11143231">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143231</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541266</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.1141585</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541266</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>12.4926</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1141585__flag_0__multi_8.9916__len_19174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>53.17</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9230769230769232</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1141585__flag_0__multi_8.9916__len_19174</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7746111_bin.94</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Ruminococcaceae;g__;s__</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1141585</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541267" accession="ERS11143232">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143232</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541267</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.1344811</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541267</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:08Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>254.299</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1344811__flag_3__multi_192.0000__len_270575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>9</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.4929577464788733</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1344811__flag_3__multi_192.0000__len_270575</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME000247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Peptostreptococcales;f__Peptostreptococcaceae;g__Intestinibacter;s__Intestinibacter bartlettii</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1344811</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541268" accession="ERS11143233">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143233</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541268</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.1536052</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.95</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541268</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>16.042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>10</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1536052__flag_1__multi_12.0000__len_38164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.84375</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1536052__flag_1__multi_12.0000__len_38164</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1536052</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541269" accession="ERS11143234">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143234</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541269</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.1732044</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541269</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>18.3138</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>14</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>37</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1732044__flag_0__multi_13.9968__len_41042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7803603_bin.452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8518518518518519</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_1732044__flag_0__multi_13.9968__len_41042</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME256853</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes;c__Bacilli;o__RF39;f__UBA660;g__CAG-302;s__CAG-302 sp001916775</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.1732044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541270" accession="ERS11143235">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143235</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541270</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.2020044</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Muvirus virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.13945538818076478</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541270</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>2258.6</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>98</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>99</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2020044__flag_0__multi_1662.0461__len_36355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>ERR7803603_bin.559</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; HMM-based (lower-bound)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.46</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.9545454545454546</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2020044__flag_0__multi_1662.0461__len_36355</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7745836_bin.70</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Proteobacteria;c__Gammaproteobacteria;o__Enterobacterales;f__Succinivibrionaceae;g__Succinivibrio;s__Succinivibrio sp000431835</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.2020044</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae; Muvirus</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541271" accession="ERS11143236">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143236</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541271</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.2262605</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05481795254560153</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541271</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>56.8169</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>131</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2262605__flag_0__multi_38.9811__len_40247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>83.23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.75</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2262605__flag_0__multi_38.9811__len_40247</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME096004</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Desulfobacterota;c__Desulfovibrionia;o__Desulfovibrionales;f__Desulfovibrionaceae;g__Desulfovibrio;s__Desulfovibrio piger</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745687_virus.1060</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541272" accession="ERS11143237">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143237</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541272</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.2619768</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.0125</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541272</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>172.891</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>29</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2619768__flag_0__multi_108.3187__len_20057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.49</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7272727272727273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_2619768__flag_0__multi_108.3187__len_20057</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-170;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_virus.1648</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541273" accession="ERS11143238">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143238</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541273</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.281093</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541273</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>189.193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_281093__flag_1__multi_141.0000__len_50028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6521739130434783</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_281093__flag_1__multi_141.0000__len_50028</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7738202_virus.338</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541274" accession="ERS11143239">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143239</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541274</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.3005186</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.05</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541274</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>128.135</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3005186__flag_1__multi_97.0000__len_272373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8771929824561403</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3005186__flag_1__multi_97.0000__len_272373</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7746748_virus.31</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541275" accession="ERS11143240">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143240</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541275</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.311282</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.12085444902106184</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541275</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>51.2947</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>16</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_311282__flag_0__multi_38.3615__len_30799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>76.97</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6111111111111112</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_311282__flag_0__multi_38.3615__len_30799</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME176098</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;s__CAG-83 sp000435975</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.311282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541276" accession="ERS11143241">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143241</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541276</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.3275206</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.1881217983453865</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541276</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>365.535</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3275206__flag_0__multi_269.8721__len_21151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>54.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3275206__flag_0__multi_269.8721__len_21151</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7745816_virus.1542</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541277" accession="ERS11143242">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143242</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541277</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.3460096</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541277</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>98.4193</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>2</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>136</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3460096__flag_0__multi_64.8468__len_23944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>66.65</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6153846153846154</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3460096__flag_0__multi_64.8468__len_23944</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>ERR7738937_bin.183</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__ER4;s__ER4 sp000765235</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.3460096</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541278" accession="ERS11143243">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143243</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541278</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.3571498</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>336724</TAXON_ID>
      <SCIENTIFIC_NAME>Myoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Myoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.7</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541278</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>20.5452</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>295</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>205</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3571498__flag_3__multi_16.0004__len_45109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.8529411764705882</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3571498__flag_3__multi_16.0004__len_45109</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;unk;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>MGV-GENOME-0354261</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Myoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541279" accession="ERS11143244">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143244</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541279</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.3571527</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541279</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>24.9167</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3571527__flag_3__multi_20.0005__len_41556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7391304347826086</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_3571527__flag_3__multi_20.0005__len_41556</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.3571527</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541280" accession="ERS11143245">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143245</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541280</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.471886</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541280</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>17.4798</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>13</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_471886__flag_1__multi_12.9903__len_39802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (medium-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>86.61</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>1</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.7142857142857143</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_471886__flag_1__multi_12.9903__len_39802</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>GUT_GENOME256714</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;g__CAG-95;s__CAG-95 sp000438155</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.471886</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541281" accession="ERS11143246">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143246</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541281</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.751770</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.9875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541281</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T04:22:35Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>22.168</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>736</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_751770__flag_0__multi_17.0000__len_36743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Many fragments with little to no review of assembly other than reporting of standard assembly statistics.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; AAI-based (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>62.3</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.5833333333333334</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_751770__flag_0__multi_17.0000__len_36743</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Lachnospirales;f__Lachnospiraceae;unk;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.751770</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
  <SAMPLE alias="SAMEA13541282" accession="ERS11143247">
    <IDENTIFIERS>
      <PRIMARY_ID>ERS11143247</PRIMARY_ID>
      <EXTERNAL_ID namespace="BioSample">SAMEA13541282</EXTERNAL_ID>
    </IDENTIFIERS>
    <TITLE>ERR7803603_virus.856916</TITLE>
    <SAMPLE_NAME>
      <TAXON_ID>2133760</TAXON_ID>
      <SCIENTIFIC_NAME>Siphoviridae environmental samples</SCIENTIFIC_NAME>
    </SAMPLE_NAME>
    <DESCRIPTION>This sample represents an uncultured Siphoviridae virus assembled from ERR7803603.</DESCRIPTION>
    <SAMPLE_ATTRIBUTES>
      <SAMPLE_ATTRIBUTE>
        <TAG>BACPHLIP temperate score</TAG>
        <VALUE>0.8875</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-FIRST-PUBLIC</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>ENA-LAST-UPDATE</TAG>
        <VALUE>2022-12-27</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>External Id</TAG>
        <VALUE>SAMEA13541282</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center alias</TAG>
        <VALUE>Stanford University School of Medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC center name</TAG>
        <VALUE>stanford university school of medicine</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC first public</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC last update</TAG>
        <VALUE>2022-12-27T19:09:07Z</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>INSDC status</TAG>
        <VALUE>public</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage depth</TAG>
        <VALUE>65.7051</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>MAG coverage software</TAG>
        <VALUE>Reads (ERR7803603) were merged using BBMerge (rem k=62 extend2=50 ecct vstrict) and then mapped to assembled contigs &gt;=1500bp (NA) using Bowtie2 v2.3.4 (--very-sensitive -X 1000). Coverage was calculated using jgi_summarize_bam_contig_depths (default parameters) from MetaBAT2 v2.15.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of BLAST hits supporting host taxonomy</TAG>
        <VALUE>23</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Number of CRISPR spacer hits supporting host taxonomy</TAG>
        <VALUE>119</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Submitter Id</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_856916__flag_0__multi_47.0000__len_42879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>Viral contig found in MAG</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly quality</TAG>
        <VALUE>Multiple fragments where gaps span repetitive regions. Presence of the 23S, 16S and 5S rRNA genes and at least 18 tRNAs.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>assembly software</TAG>
        <VALUE>MEGAHIT;1.2.8;--meta-large</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>collection_date</TAG>
        <VALUE>2013-09-07</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness approach</TAG>
        <VALUE>CheckV; DTR (high-confidence)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness score</TAG>
        <VALUE>100</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>completeness software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end, database v1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>detection type</TAG>
        <VALUE>independent sequence (UViG)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (biome)</TAG>
        <VALUE>terrestrial biome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (feature)</TAG>
        <VALUE>human-associated habitat</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>environment (material)</TAG>
        <VALUE>feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>feature prediction</TAG>
        <VALUE>Prodigal;2.6.3;meta</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (country and/or sea)</TAG>
        <VALUE>Tanzania</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (elevation)</TAG>
        <VALUE>1287</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (latitude)</TAG>
        <VALUE>-3.55</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>geographic location (longitude)</TAG>
        <VALUE>-34.58</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>host prediction approach</TAG>
        <VALUE>CRISPR spacer matches (PILER-CR;v1.06 or CRT;v1.2; blastn;2.10.1+ -dust no -word_size 18, allowing &lt;=1 gap or mismatch over 95% of spacer length) and &gt;=1kb+&gt;=96% identity BLAST hits (hs-blastn max_target_seqs 25000 -perc_identity 96 -evalue 1e-180) were used to identify connections between each viral contig and all non-viral contigs in the UHGG v1 (286,997 genomes) and this study (54,779 genomes). The predicted host for each viral contig is the lowest taxonomic rank with &gt;70% agreement among all CRISPR+BLAST connections, as described previously (https://doi.org/10.1038/s41564-021-00928-6).</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>investigation type</TAG>
        <VALUE>uncultivated viral genomes</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>isolation_source</TAG>
        <VALUE>human feces</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>metagenomic source</TAG>
        <VALUE>human gut metagenome</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>nucleic acid extraction</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>number of unique standard tRNAs extracted</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>percent of BLAST hits supporting viral taxonomy</TAG>
        <VALUE>0.6666666666666666</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome structure</TAG>
        <VALUE>non-segmented</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>predicted genome type</TAG>
        <VALUE>dsDNA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>project name</TAG>
        <VALUE>Human gut UViGs</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference database(s)</TAG>
        <VALUE>NCBI GenBank downloaded using INPHARED v1.6 (27 Feb 2022)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>reference for biomaterial</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample collection device or method</TAG>
        <VALUE>https://doi.org/10.1126/science.aan4834; https://doi.org/10.1080/19490976.2018.1494103</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample derived from</TAG>
        <VALUE>ERS2585552</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sample name</TAG>
        <VALUE>Hadza_MoBio_hadza-A-D_C_6_1128__k127_856916__flag_0__multi_47.0000__len_42879</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>sequencing method</TAG>
        <VALUE>Illumina NovaSeq 6000</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>similarity search method</TAG>
        <VALUE>diamond;2.0.9;--sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>source of UViGs</TAG>
        <VALUE>metagenome (not viral targeted)</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>species-level group of predicted host</TAG>
        <VALUE>NA</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>tRNA extraction software</TAG>
        <VALUE>tRNAscan-SE;2.0.9;-B</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomic classification</TAG>
        <VALUE>Taxonomic predictions from VPF-Class (https://github.com/biocom-uib/vpf-tools, c8422bc, default params) and from DIAMOND (v2.0.9, --sensitive -k 10 --query-cover 50 --subject-cover 50 --evalue 1e-5) vs NCBI GenBank database compiled using INPHARED v1.6 (run 27 Feb 2022) were aggregated and the lowest agreeing taxonomy was used.</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>taxonomy of predicted host</TAG>
        <VALUE>d__Bacteria;p__Firmicutes_A;c__Clostridia;o__Oscillospirales;f__Oscillospiraceae;g__CAG-83;unk;unk</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU classification approach</TAG>
        <VALUE>95% ANI;85% AF;greedy, centroid-based</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU database</TAG>
        <VALUE>MGV (https://doi.org/10.1038/s41564-021-00928-6);1.0</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU representative</TAG>
        <VALUE>ERR7803603_virus.856916</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>vOTU sequence comparison approach</TAG>
        <VALUE>blastn;2.10.1+;-max_target_seqs 25000 -perc_identity 90</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral identification software</TAG>
        <VALUE>CheckV;0.8.1;end-to-end,&gt;=medium-quality; MGV_viral_detection(https://github.com/snayfach/MGV);95c2d4d;defaults</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>viral taxonomy</TAG>
        <VALUE>Viruses; Duplodnaviria; Heunggongvirae; Uroviricota; Caudoviricetes; Caudovirales; Siphoviridae</VALUE>
      </SAMPLE_ATTRIBUTE>
      <SAMPLE_ATTRIBUTE>
        <TAG>virus enrichment approach</TAG>
        <VALUE>none</VALUE>
      </SAMPLE_ATTRIBUTE>
    </SAMPLE_ATTRIBUTES>
  </SAMPLE>
</SAMPLE_SET>
